Merge branch 'master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-09-03 22:43:20 +02:00 committed by GitHub
commit 8629f7e592
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
77 changed files with 2310 additions and 356 deletions

View File

@ -18,4 +18,4 @@ target_compile_options (_poco_util
-Wno-zero-as-null-pointer-constant
)
target_include_directories (_poco_util SYSTEM PUBLIC "include")
target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML)
target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML Poco::Net)

View File

@ -241,6 +241,20 @@ namespace Util
/// If the value contains references to other properties (${<property>}), these
/// are expanded.
std::string getHost(const std::string & key) const;
/// Returns the string value of the host property with the given name.
/// Throws a NotFoundException if the key does not exist.
/// Throws a SyntaxException if the property is not a valid host (IP address or domain).
/// If the value contains references to other properties (${<property>}), these
/// are expanded.
std::string getHost(const std::string & key, const std::string & defaultValue) const;
/// If a property with the given key exists, returns the host property's string value,
/// otherwise returns the given default value.
/// Throws a SyntaxException if the property is not a valid host (IP address or domain).
/// If the value contains references to other properties (${<property>}), these
/// are expanded.
virtual void setString(const std::string & key, const std::string & value);
/// Sets the property with the given key to the given value.
/// An already existing value for the key is overwritten.
@ -339,12 +353,35 @@ namespace Util
static bool parseBool(const std::string & value);
void setRawWithEvent(const std::string & key, std::string value);
static void checkHostValidity(const std::string & value);
/// Throws a SyntaxException if the value is not a valid host (IP address or domain).
virtual ~AbstractConfiguration();
private:
std::string internalExpand(const std::string & value) const;
std::string uncheckedExpand(const std::string & value) const;
static bool isValidIPv4Address(const std::string & value);
/// IPv4 address considered valid if it is "0.0.0.0" or one of those,
/// defined by inet_aton() or inet_addr()
static bool isValidIPv6Address(const std::string & value);
/// IPv6 address considered valid if it is "::" or one of those,
/// defined by inet_pton() with AF_INET6 flag
/// (in this case it may have scope id and may be surrounded by '[', ']')
static bool isValidDomainName(const std::string & value);
/// <domain> ::= <subdomain> [ "." ]
/// <subdomain> ::= <label> | <subdomain> "." <label>
/// <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
/// <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
/// <let-dig-hyp> ::= <let-dig> | "-"
/// <let-dig> ::= <letter> | <digit>
/// <letter> ::= any one of the 52 alphabetic characters A through Z in
/// upper case and a through z in lower case
/// <digit> ::= any one of the ten digits 0 through 9
AbstractConfiguration(const AbstractConfiguration &);
AbstractConfiguration & operator=(const AbstractConfiguration &);

View File

@ -18,6 +18,7 @@
#include "Poco/NumberParser.h"
#include "Poco/NumberFormatter.h"
#include "Poco/String.h"
#include "Poco/Net/IPAddressImpl.h"
using Poco::Mutex;
@ -263,6 +264,41 @@ bool AbstractConfiguration::getBool(const std::string& key, bool defaultValue) c
}
std::string AbstractConfiguration::getHost(const std::string& key) const
{
Mutex::ScopedLock lock(_mutex);
std::string value;
if (getRaw(key, value))
{
std::string expandedValue = internalExpand(value);
checkHostValidity(expandedValue);
return expandedValue;
}
else
throw NotFoundException(key);
}
std::string AbstractConfiguration::getHost(const std::string& key, const std::string& defaultValue) const
{
Mutex::ScopedLock lock(_mutex);
std::string value;
if (getRaw(key, value))
{
std::string expandedValue = internalExpand(value);
checkHostValidity(expandedValue);
return expandedValue;
}
else
{
checkHostValidity(defaultValue);
return defaultValue;
}
}
void AbstractConfiguration::setString(const std::string& key, const std::string& value)
{
setRawWithEvent(key, value);
@ -529,4 +565,68 @@ void AbstractConfiguration::setRawWithEvent(const std::string& key, std::string
}
void AbstractConfiguration::checkHostValidity(const std::string& value)
{
if (!isValidIPv4Address(value) && !isValidIPv6Address(value) && !isValidDomainName(value))
{
throw SyntaxException("Property is not a valid host name", value);
}
}
bool AbstractConfiguration::isValidIPv4Address(const std::string& value)
{
using Poco::Net::Impl::IPv4AddressImpl;
IPv4AddressImpl empty4 = IPv4AddressImpl();
IPv4AddressImpl ipAddress = IPv4AddressImpl::parse(value);
return ipAddress != empty4 || value == "0.0.0.0";
}
bool AbstractConfiguration::isValidIPv6Address(const std::string& value)
{
#if defined(POCO_HAVE_IPv6)
using Poco::Net::Impl::IPv6AddressImpl;
IPv6AddressImpl empty6 = IPv6AddressImpl();
IPv6AddressImpl ipAddress = IPv6AddressImpl::parse(value);
return ipAddress != empty6 || value == "::";
#else
return false;
#endif
}
bool AbstractConfiguration::isValidDomainName(const std::string& value)
{
if (value.empty() || value == "." || value.length() > 253)
return false;
int labelLength = 0;
char oldChar = 0;
for (char ch : value)
{
if (ch == '.')
{
if (labelLength == 0 || labelLength > 63 || oldChar == '-')
return false;
labelLength = 0;
}
else if (isalnum(ch) || ch == '-')
{
if (labelLength == 0 && (ch == '-' || isdigit(ch)))
return false;
++labelLength;
}
else
{
return false;
}
oldChar = ch;
}
return oldChar == '.' || (labelLength > 0 && labelLength <= 63 && oldChar != '-');
}
} } // namespace Poco::Util

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.8.2.3"
ARG VERSION="24.8.3.59"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.8.2.3"
ARG VERSION="24.8.3.59"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.8.2.3"
ARG VERSION="24.8.3.59"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -0,0 +1,50 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.8.3.59-lts (e729b9fa40e) FIXME as compared to v24.8.2.3-lts (b54f79ed323)
#### New Feature
* Backported in [#68710](https://github.com/ClickHouse/ClickHouse/issues/68710): Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'` (or of course just: `SYSTEM DROP QUERY CACHE` which will clear the entire query cache). [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
#### Improvement
* Backported in [#69097](https://github.com/ClickHouse/ClickHouse/issues/69097): Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#68973](https://github.com/ClickHouse/ClickHouse/issues/68973): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
* Backported in [#68818](https://github.com/ClickHouse/ClickHouse/issues/68818): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#68893](https://github.com/ClickHouse/ClickHouse/issues/68893): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68721](https://github.com/ClickHouse/ClickHouse/issues/68721): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
* Backported in [#69029](https://github.com/ClickHouse/ClickHouse/issues/69029): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68864](https://github.com/ClickHouse/ClickHouse/issues/68864): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68854](https://github.com/ClickHouse/ClickHouse/issues/68854): Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68790](https://github.com/ClickHouse/ClickHouse/issues/68790): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Backported in [#69108](https://github.com/ClickHouse/ClickHouse/issues/69108): TODO. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68850](https://github.com/ClickHouse/ClickHouse/issues/68850): Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68911](https://github.com/ClickHouse/ClickHouse/issues/68911): Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#69160](https://github.com/ClickHouse/ClickHouse/issues/69160): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#69072](https://github.com/ClickHouse/ClickHouse/issues/69072): Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#69016](https://github.com/ClickHouse/ClickHouse/issues/69016): Don't use serializations cache in const Dynamic column methods. It could let to use-of-unitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#69120](https://github.com/ClickHouse/ClickHouse/issues/69120): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
#### NO CL CATEGORY
* Backported in [#68947](https://github.com/ClickHouse/ClickHouse/issues/68947):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#68704](https://github.com/ClickHouse/ClickHouse/issues/68704): Fix enumerating dynamic subcolumns. [#68582](https://github.com/ClickHouse/ClickHouse/pull/68582) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#69000](https://github.com/ClickHouse/ClickHouse/issues/69000): Prioritizing of virtual columns in hive partitioning. [#68606](https://github.com/ClickHouse/ClickHouse/pull/68606) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Backported in [#68799](https://github.com/ClickHouse/ClickHouse/issues/68799): CI: Disable SQLLogic job. [#68654](https://github.com/ClickHouse/ClickHouse/pull/68654) ([Max K.](https://github.com/maxknv)).
* Backported in [#68834](https://github.com/ClickHouse/ClickHouse/issues/68834): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
* Backported in [#68781](https://github.com/ClickHouse/ClickHouse/issues/68781): Fix flaky test 00989_parallel_parts_loading. [#68737](https://github.com/ClickHouse/ClickHouse/pull/68737) ([alesapin](https://github.com/alesapin)).
* Backported in [#68762](https://github.com/ClickHouse/ClickHouse/issues/68762): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
* Backported in [#68810](https://github.com/ClickHouse/ClickHouse/issues/68810): Try to disable rerun check if job triggered manually. [#68751](https://github.com/ClickHouse/ClickHouse/pull/68751) ([Max K.](https://github.com/maxknv)).
* Backported in [#68962](https://github.com/ClickHouse/ClickHouse/issues/68962): Fix 2477 timeout. [#68752](https://github.com/ClickHouse/ClickHouse/pull/68752) ([jsc0218](https://github.com/jsc0218)).
* Backported in [#68977](https://github.com/ClickHouse/ClickHouse/issues/68977): Check setting use_json_alias_for_old_object_type in runtime. [#68793](https://github.com/ClickHouse/ClickHouse/pull/68793) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68852](https://github.com/ClickHouse/ClickHouse/issues/68852): Make dynamic structure selection more consistent. [#68802](https://github.com/ClickHouse/ClickHouse/pull/68802) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#69052](https://github.com/ClickHouse/ClickHouse/issues/69052): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).

View File

@ -499,7 +499,7 @@ Required parameters:
- `type``encrypted`. Otherwise the encrypted disk is not created.
- `disk` — Type of disk for data storage.
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
You can specify multiple keys using the `id` attribute (see example above).
You can specify multiple keys using the `id` attribute (see example below).
Optional parameters:

View File

@ -0,0 +1,44 @@
---
slug: /en/sql-reference/aggregate-functions/reference/distinctdynamictypes
sidebar_position: 215
---
# distinctDynamicTypes
Calculates the list of distinct data types stored in [Dynamic](../../data-types/dynamic.md) column.
**Syntax**
```sql
distinctDynamicTypes(dynamic)
```
**Arguments**
- `dynamic` — [Dynamic](../../data-types/dynamic.md) column.
**Returned Value**
- The sorted list of data type names [Array(String)](../../data-types/array.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_dynamic;
CREATE TABLE test_dynamic(d Dynamic) ENGINE = Memory;
INSERT INTO test_dynamic VALUES (42), (NULL), ('Hello'), ([1, 2, 3]), ('2020-01-01'), (map(1, 2)), (43), ([4, 5]), (NULL), ('World'), (map(3, 4))
```
```sql
SELECT distinctDynamicTypes(d) FROM test_dynamic;
```
Result:
```reference
┌─distinctDynamicTypes(d)──────────────────────────────────────┐
│ ['Array(Int64)','Date','Int64','Map(UInt8, UInt8)','String'] │
└──────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,125 @@
---
slug: /en/sql-reference/aggregate-functions/reference/distinctjsonpaths
sidebar_position: 216
---
# distinctJSONPaths
Calculates the list of distinct paths stored in [JSON](../../data-types/newjson.md) column.
**Syntax**
```sql
distinctJSONPaths(json)
```
**Arguments**
- `json` — [JSON](../../data-types/newjson.md) column.
**Returned Value**
- The sorted list of paths [Array(String)](../../data-types/array.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_json;
CREATE TABLE test_json(json JSON) ENGINE = Memory;
INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
```
```sql
SELECT distinctJSONPaths(json) FROM test_json;
```
Result:
```reference
┌─distinctJSONPaths(json)───┐
│ ['a','b','c.d.e','c.d.f'] │
└───────────────────────────┘
```
# distinctJSONPathsAndTypes
Calculates the list of distinct paths and their types stored in [JSON](../../data-types/newjson.md) column.
**Syntax**
```sql
distinctJSONPathsAndTypes(json)
```
**Arguments**
- `json` — [JSON](../../data-types/newjson.md) column.
**Returned Value**
- The sorted map of paths and types [Map(String, Array(String))](../../data-types/map.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_json;
CREATE TABLE test_json(json JSON) ENGINE = Memory;
INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
```
```sql
SELECT distinctJSONPathsAndTypes(json) FROM test_json;
```
Result:
```reference
┌─distinctJSONPathsAndTypes(json)───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ {'a':['Int64'],'b':['Array(Nullable(Int64))','String'],'c.d.e':['Date'],'c.d.f':['Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))']} │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**Note**
If JSON declaration contains paths with specified types, these paths will be always included in the result of `distinctJSONPaths/distinctJSONPathsAndTypes` functions even if input data didn't have values for these paths.
```sql
DROP TABLE IF EXISTS test_json;
CREATE TABLE test_json(json JSON(a UInt32)) ENGINE = Memory;
INSERT INTO test_json VALUES ('{"b" : "Hello"}'), ('{"b" : "World", "c" : [1, 2, 3]}');
```
```sql
SELECT json FROM test_json;
```
```text
┌─json──────────────────────────────────┐
│ {"a":0,"b":"Hello"} │
│ {"a":0,"b":"World","c":["1","2","3"]} │
└───────────────────────────────────────┘
```
```sql
SELECT distinctJSONPaths(json) FROM test_json;
```
```text
┌─distinctJSONPaths(json)─┐
│ ['a','b','c'] │
└─────────────────────────┘
```
```sql
SELECT distinctJSONPathsAndTypes(json) FROM test_json;
```
```text
┌─distinctJSONPathsAndTypes(json)────────────────────────────────┐
│ {'a':['UInt32'],'b':['String'],'c':['Array(Nullable(Int64))']} │
└────────────────────────────────────────────────────────────────┘
```

View File

@ -505,7 +505,130 @@ As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and move
## Introspection functions
There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes).
There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes), [distinctDynamicTypes](../aggregate-functions/reference/distinctdynamictypes.md), [distinctJSONPaths and distinctJSONPathsAndTypes](../aggregate-functions/reference/distinctjsonpaths.md)
**Examples**
Let's investigate the content of [GH Archive](https://www.gharchive.org/) dataset for `2020-01-01` date:
```sql
SELECT arrayJoin(distinctJSONPaths(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject)
```
```text
┌─arrayJoin(distinctJSONPaths(json))─────────────────────────┐
│ actor.avatar_url │
│ actor.display_login │
│ actor.gravatar_id │
│ actor.id │
│ actor.login │
│ actor.url │
│ created_at │
│ id │
│ org.avatar_url │
│ org.gravatar_id │
│ org.id │
│ org.login │
│ org.url │
│ payload.action │
│ payload.before │
│ payload.comment._links.html.href │
│ payload.comment._links.pull_request.href │
│ payload.comment._links.self.href │
│ payload.comment.author_association │
│ payload.comment.body │
│ payload.comment.commit_id │
│ payload.comment.created_at │
│ payload.comment.diff_hunk │
│ payload.comment.html_url │
│ payload.comment.id │
│ payload.comment.in_reply_to_id │
│ payload.comment.issue_url │
│ payload.comment.line │
│ payload.comment.node_id │
│ payload.comment.original_commit_id │
│ payload.comment.original_position │
│ payload.comment.path │
│ payload.comment.position │
│ payload.comment.pull_request_review_id │
...
│ payload.release.node_id │
│ payload.release.prerelease │
│ payload.release.published_at │
│ payload.release.tag_name │
│ payload.release.tarball_url │
│ payload.release.target_commitish │
│ payload.release.upload_url │
│ payload.release.url │
│ payload.release.zipball_url │
│ payload.size │
│ public │
│ repo.id │
│ repo.name │
│ repo.url │
│ type │
└─arrayJoin(distinctJSONPaths(json))─────────────────────────┘
```
```sql
SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) SETTINGS date_time_input_format='best_effort'
```
```text
┌─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┐
│ ('actor.avatar_url',['String']) │
│ ('actor.display_login',['String']) │
│ ('actor.gravatar_id',['String']) │
│ ('actor.id',['Int64']) │
│ ('actor.login',['String']) │
│ ('actor.url',['String']) │
│ ('created_at',['DateTime']) │
│ ('id',['String']) │
│ ('org.avatar_url',['String']) │
│ ('org.gravatar_id',['String']) │
│ ('org.id',['Int64']) │
│ ('org.login',['String']) │
│ ('org.url',['String']) │
│ ('payload.action',['String']) │
│ ('payload.before',['String']) │
│ ('payload.comment._links.html.href',['String']) │
│ ('payload.comment._links.pull_request.href',['String']) │
│ ('payload.comment._links.self.href',['String']) │
│ ('payload.comment.author_association',['String']) │
│ ('payload.comment.body',['String']) │
│ ('payload.comment.commit_id',['String']) │
│ ('payload.comment.created_at',['DateTime']) │
│ ('payload.comment.diff_hunk',['String']) │
│ ('payload.comment.html_url',['String']) │
│ ('payload.comment.id',['Int64']) │
│ ('payload.comment.in_reply_to_id',['Int64']) │
│ ('payload.comment.issue_url',['String']) │
│ ('payload.comment.line',['Int64']) │
│ ('payload.comment.node_id',['String']) │
│ ('payload.comment.original_commit_id',['String']) │
│ ('payload.comment.original_position',['Int64']) │
│ ('payload.comment.path',['String']) │
│ ('payload.comment.position',['Int64']) │
│ ('payload.comment.pull_request_review_id',['Int64']) │
...
│ ('payload.release.node_id',['String']) │
│ ('payload.release.prerelease',['Bool']) │
│ ('payload.release.published_at',['DateTime']) │
│ ('payload.release.tag_name',['String']) │
│ ('payload.release.tarball_url',['String']) │
│ ('payload.release.target_commitish',['String']) │
│ ('payload.release.upload_url',['String']) │
│ ('payload.release.url',['String']) │
│ ('payload.release.zipball_url',['String']) │
│ ('payload.size',['Int64']) │
│ ('public',['Bool']) │
│ ('repo.id',['Int64']) │
│ ('repo.name',['String']) │
│ ('repo.url',['String']) │
│ ('type',['String']) │
└─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘
```
## Tips for better usage of the JSON type

View File

@ -8,14 +8,14 @@ slug: /en/guides/developer/transactional
This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
- Isolated: concurrent clients observe a consistent snapshot of the tablethe state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
- Isolated: concurrent clients observe a consistent snapshot of the tablethe state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen. Clients inside of another transaction have [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation), while clients outside of a transaction have [read uncommitted](https://en.wikipedia.org/wiki/Isolation_(database_systems)#Read_uncommitted) isolation level.
- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
Same as Case 1 above, with this detail:
- If table has many partitions and INSERT covers many partitionsthen insertion into every partition is transactional on its own
- If table has many partitions and INSERT covers many partitions, then insertion into every partition is transactional on its own
## Case 3: INSERT into one distributed table of the MergeTree* family
@ -38,7 +38,7 @@ Same as Case 1 above, with this detail:
- the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
- ClickHouse is using MVCC with snapshot isolation internally
- ClickHouse is using [MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control) with [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation) internally for concurrent transactions
- all ACID properties are valid even in the case of server kill/crash
- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
@ -260,7 +260,7 @@ FROM mergetree_table
### Transactions introspection
You can inspect transactions by querying the `system.transactions` table, but note that you cannot query that
table from a session that is in a transactionopen a second `clickhouse client` session to query that table.
table from a session that is in a transaction. Open a second `clickhouse client` session to query that table.
```sql
SELECT *

View File

@ -0,0 +1,161 @@
#include <unordered_set>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesBinaryEncoding.h>
#include <Columns/ColumnDynamic.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_ARRAY_SIZE;
}
struct AggregateFunctionDistinctDynamicTypesData
{
constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
std::unordered_set<String> data;
void add(const String & type)
{
data.insert(type);
}
void merge(const AggregateFunctionDistinctDynamicTypesData & other)
{
data.insert(other.data.begin(), other.data.end());
}
void serialize(WriteBuffer & buf) const
{
writeVarUInt(data.size(), buf);
for (const auto & type : data)
writeStringBinary(type, buf);
}
void deserialize(ReadBuffer & buf)
{
size_t size;
readVarUInt(size, buf);
if (size > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", MAX_ARRAY_SIZE, size);
data.reserve(size);
String type;
for (size_t i = 0; i != size; ++i)
{
readStringBinary(type, buf);
data.insert(type);
}
}
void insertResultInto(IColumn & column)
{
/// Insert types in sorted order for better output.
auto & array_column = assert_cast<ColumnArray &>(column);
auto & string_column = assert_cast<ColumnString &>(array_column.getData());
std::vector<String> sorted_data(data.begin(), data.end());
std::sort(sorted_data.begin(), sorted_data.end());
for (const auto & type : sorted_data)
string_column.insertData(type.data(), type.size());
array_column.getOffsets().push_back(string_column.size());
}
};
/// Calculates the list of distinct data types in Dynamic column.
class AggregateFunctionDistinctDynamicTypes final : public IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>
{
public:
explicit AggregateFunctionDistinctDynamicTypes(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>(argument_types_, {}, std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()))
{
}
String getName() const override { return "distinctDynamicTypes"; }
bool allocatesMemoryInArena() const override { return false; }
void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & dynamic_column = assert_cast<const ColumnDynamic & >(*columns[0]);
if (dynamic_column.isNullAt(row_num))
return;
data(place).add(dynamic_column.getTypeNameAt(row_num));
}
void ALWAYS_INLINE addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
IAggregateFunctionDataHelper::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
/// Optimization for case when we add all rows from the column into single place.
/// In this case we can avoid iterating over all rows because we can get all types
/// in Dynamic column in a more efficient way.
else
assert_cast<const ColumnDynamic & >(*columns[0]).getAllTypeNamesInto(data(place).data);
}
void addManyDefaults(
AggregateDataPtr __restrict /*place*/,
const IColumn ** /*columns*/,
size_t /*length*/,
Arena * /*arena*/) const override
{
/// Default value for Dynamic is NULL, so nothing to add.
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
data(place).insertResultInto(to);
}
};
AggregateFunctionPtr createAggregateFunctionDistinctDynamicTypes(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
if (argument_types.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function {}. Expected single argument with type Dynamic, got {} arguments", name, argument_types.size());
if (!isDynamic(argument_types[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type Dynamic", argument_types[0]->getName(), name);
return std::make_shared<AggregateFunctionDistinctDynamicTypes>(argument_types);
}
void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory)
{
factory.registerFunction("distinctDynamicTypes", createAggregateFunctionDistinctDynamicTypes);
}
}

View File

@ -0,0 +1,350 @@
#include <unordered_set>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypesBinaryEncoding.h>
#include <Columns/ColumnDynamic.h>
#include <Columns/ColumnObject.h>
#include <Columns/ColumnMap.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_ARRAY_SIZE;
}
constexpr static size_t DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE = 0xFFFFFF;
struct AggregateFunctionDistinctJSONPathsData
{
static constexpr auto name = "distinctJSONPaths";
std::unordered_set<String> data;
void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> &)
{
for (const auto & [path, _] : column.getTypedPaths())
data.insert(path);
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
{
/// Add path from dynamic paths only if it's not NULL in this row.
if (!dynamic_column->isNullAt(row_num))
data.insert(path);
}
/// Iterate over paths in shared data in this row.
const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
const auto & shared_data_offsets = column.getSharedDataOffsets();
const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
for (size_t i = start; i != end; ++i)
data.insert(shared_data_paths->getDataAt(i).toString());
}
void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> &)
{
for (const auto & [path, _] : column.getTypedPaths())
data.insert(path);
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
{
/// Add dynamic path only if it has at least one non-null value.
/// getNumberOfDefaultRows for Dynamic column is O(1).
if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
data.insert(path);
}
/// Iterate over all paths in shared data.
const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
for (size_t i = 0; i != shared_data_paths->size(); ++i)
data.insert(shared_data_paths->getDataAt(i).toString());
}
void merge(const AggregateFunctionDistinctJSONPathsData & other)
{
data.insert(other.data.begin(), other.data.end());
}
void serialize(WriteBuffer & buf) const
{
writeVarUInt(data.size(), buf);
for (const auto & path : data)
writeStringBinary(path, buf);
}
void deserialize(ReadBuffer & buf)
{
size_t size;
readVarUInt(size, buf);
if (size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, size);
String path;
for (size_t i = 0; i != size; ++i)
{
readStringBinary(path, buf);
data.insert(path);
}
}
void insertResultInto(IColumn & column)
{
/// Insert paths in sorted order for better output.
auto & array_column = assert_cast<ColumnArray &>(column);
auto & string_column = assert_cast<ColumnString &>(array_column.getData());
std::vector<String> sorted_data(data.begin(), data.end());
std::sort(sorted_data.begin(), sorted_data.end());
for (const auto & path : sorted_data)
string_column.insertData(path.data(), path.size());
array_column.getOffsets().push_back(string_column.size());
}
static DataTypePtr getResultType()
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
};
struct AggregateFunctionDistinctJSONPathsAndTypesData
{
static constexpr auto name = "distinctJSONPathsAndTypes";
std::unordered_map<String, std::unordered_set<String>> data;
void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> & typed_paths_type_names)
{
for (const auto & [path, _] : column.getTypedPaths())
data[path].insert(typed_paths_type_names.at(path));
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
{
if (!dynamic_column->isNullAt(row_num))
data[path].insert(dynamic_column->getTypeNameAt(row_num));
}
/// Iterate over paths om shared data in this row and decode the data types.
const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
const auto & shared_data_offsets = column.getSharedDataOffsets();
const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
for (size_t i = start; i != end; ++i)
{
auto path = shared_data_paths->getDataAt(i).toString();
auto value = shared_data_values->getDataAt(i);
ReadBufferFromMemory buf(value.data, value.size);
auto type = decodeDataType(buf);
/// We should not have Nulls here but let's check just in case.
chassert(!isNothing(type));
data[path].insert(type->getName());
}
}
void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> & typed_paths_type_names)
{
for (const auto & [path, _] : column.getTypedPaths())
data[path].insert(typed_paths_type_names.at(path));
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
{
/// Add dynamic path only if it has at least one non-null value.
/// getNumberOfDefaultRows for Dynamic column is O(1).
if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
dynamic_column->getAllTypeNamesInto(data[path]);
}
/// Iterate over all paths in shared data and decode the data types.
const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
for (size_t i = 0; i != shared_data_paths->size(); ++i)
{
auto path = shared_data_paths->getDataAt(i).toString();
auto value = shared_data_values->getDataAt(i);
ReadBufferFromMemory buf(value.data, value.size);
auto type = decodeDataType(buf);
/// We should not have Nulls here but let's check just in case.
chassert(!isNothing(type));
data[path].insert(type->getName());
}
}
void merge(const AggregateFunctionDistinctJSONPathsAndTypesData & other)
{
for (const auto & [path, types] : other.data)
data[path].insert(types.begin(), types.end());
}
void serialize(WriteBuffer & buf) const
{
writeVarUInt(data.size(), buf);
for (const auto & [path, types] : data)
{
writeStringBinary(path, buf);
writeVarUInt(types.size(), buf);
for (const auto & type : types)
writeStringBinary(type, buf);
}
}
void deserialize(ReadBuffer & buf)
{
size_t paths_size, types_size;
readVarUInt(paths_size, buf);
if (paths_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for paths (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size);
data.reserve(paths_size);
String path, type;
for (size_t i = 0; i != paths_size; ++i)
{
readStringBinary(path, buf);
readVarUInt(types_size, buf);
if (types_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for types (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size);
data[path].reserve(types_size);
for (size_t j = 0; j != types_size; ++j)
{
readStringBinary(type, buf);
data[path].insert(type);
}
}
}
void insertResultInto(IColumn & column)
{
/// Insert sorted paths and types for better output.
auto & array_column = assert_cast<ColumnMap &>(column).getNestedColumn();
auto & tuple_column = assert_cast<ColumnTuple &>(array_column.getData());
auto & key_column = assert_cast<ColumnString &>(tuple_column.getColumn(0));
auto & value_column = assert_cast<ColumnArray &>(tuple_column.getColumn(1));
auto & value_column_data = assert_cast<ColumnString &>(value_column.getData());
std::vector<std::pair<String, std::vector<String>>> sorted_data;
sorted_data.reserve(data.size());
for (const auto & [path, types] : data)
{
std::vector<String> sorted_types(types.begin(), types.end());
std::sort(sorted_types.begin(), sorted_types.end());
sorted_data.emplace_back(path, std::move(sorted_types));
}
std::sort(sorted_data.begin(), sorted_data.end());
for (const auto & [path, types] : sorted_data)
{
key_column.insertData(path.data(), path.size());
for (const auto & type : types)
value_column_data.insertData(type.data(), type.size());
value_column.getOffsets().push_back(value_column_data.size());
}
array_column.getOffsets().push_back(key_column.size());
}
static DataTypePtr getResultType()
{
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
}
};
/// Calculates the list of distinct paths or pairs (path, type) in JSON column.
template <typename Data>
class AggregateFunctionDistinctJSONPathsAndTypes final : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>
{
public:
explicit AggregateFunctionDistinctJSONPathsAndTypes(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>(
argument_types_, {}, Data::getResultType())
{
const auto & typed_paths_types = assert_cast<const DataTypeObject &>(*argument_types_[0]).getTypedPaths();
typed_paths_type_names.reserve(typed_paths_types.size());
for (const auto & [path, type] : typed_paths_types)
typed_paths_type_names[path] = type->getName();
}
String getName() const override { return Data::name; }
bool allocatesMemoryInArena() const override { return false; }
void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & object_column = assert_cast<const ColumnObject & >(*columns[0]);
this->data(place).add(object_column, row_num, typed_paths_type_names);
}
void ALWAYS_INLINE addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
/// Optimization for case when we add all rows from the column into single place.
/// In this case we can avoid iterating over all rows because we can get all paths
/// and types in JSON column in a more efficient way.
else
this->data(place).addWholeColumn(assert_cast<const ColumnObject & >(*columns[0]), typed_paths_type_names);
}
void addManyDefaults(
AggregateDataPtr __restrict /*place*/,
const IColumn ** /*columns*/,
size_t /*length*/,
Arena * /*arena*/) const override
{
/// Default value for JSON is empty object, so nothing to add.
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
private:
std::unordered_map<String, String> typed_paths_type_names;
};
template <typename Data>
AggregateFunctionPtr createAggregateFunctionDistinctJSONPathsAndTypes(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
if (argument_types.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function {}. Expected single argument with type JSON, got {} arguments", name, argument_types.size());
if (!isObject(argument_types[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type JSON", argument_types[0]->getName(), name);
return std::make_shared<AggregateFunctionDistinctJSONPathsAndTypes<Data>>(argument_types);
}
void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory)
{
factory.registerFunction("distinctJSONPaths", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsData>);
factory.registerFunction("distinctJSONPathsAndTypes", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsAndTypesData>);
}
}

View File

@ -89,6 +89,8 @@ void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory);
void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -191,6 +193,8 @@ void registerAggregateFunctions()
registerAggregateFunctionFlameGraph(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerAggregateFunctionLargestTriangleThreeBuckets(factory);
registerAggregateFunctionDistinctDynamicTypes(factory);
registerAggregateFunctionDistinctJSONPathsAndTypes(factory);
registerWindowFunctions(factory);
}

View File

@ -2699,14 +2699,6 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
ReadBufferFromFile in(file_name);
readStringUntilEOF(queries_from_file, in);
if (!getClientConfiguration().has("log_comment"))
{
Settings settings = client_context->getSettingsCopy();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
settings.log_comment = fs::absolute(fs::path(file_name));
client_context->setSettings(settings);
}
return executeMultiQuery(queries_from_file);
}

View File

@ -979,6 +979,41 @@ ColumnPtr ColumnDynamic::compress() const
});
}
String ColumnDynamic::getTypeNameAt(size_t row_num) const
{
const auto & variant_col = getVariantColumn();
const size_t discr = variant_col.globalDiscriminatorAt(row_num);
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
return "";
if (discr == getSharedVariantDiscriminator())
{
const auto value = getSharedVariant().getDataAt(variant_col.offsetAt(row_num));
ReadBufferFromMemory buf(value.data, value.size);
return decodeDataType(buf)->getName();
}
return variant_info.variant_names[discr];
}
void ColumnDynamic::getAllTypeNamesInto(std::unordered_set<String> & names) const
{
auto shared_variant_discr = getSharedVariantDiscriminator();
for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
{
if (i != shared_variant_discr && !variant_column_ptr->getVariantByGlobalDiscriminator(i).empty())
names.insert(variant_info.variant_names[i]);
}
const auto & shared_variant = getSharedVariant();
for (size_t i = 0; i != shared_variant.size(); ++i)
{
const auto value = shared_variant.getDataAt(i);
ReadBufferFromMemory buf(value.data, value.size);
names.insert(decodeDataType(buf)->getName());
}
}
void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
{
if (source_columns.empty())

View File

@ -430,6 +430,9 @@ public:
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) { return getVariantSerialization(variant_type, variant_type->getName()); }
String getTypeNameAt(size_t row_num) const;
void getAllTypeNamesInto(std::unordered_set<String> & names) const;
private:
void createVariantInfo(const DataTypePtr & variant_type);

View File

@ -0,0 +1,69 @@
#include <Poco/AutoPtr.h>
#include <Poco/DOM/DOMParser.h>
#include <Poco/Util/XMLConfiguration.h>
#include <gtest/gtest.h>
TEST(Common, ConfigHostValidation)
{
std::string xml(R"CONFIG(<clickhouse>
<IPv4_1>0.0.0.0</IPv4_1>
<IPv4_2>192.168.0.1</IPv4_2>
<IPv4_3>127.0.0.1</IPv4_3>
<IPv4_4>255.255.255.255</IPv4_4>
<IPv6_1>2001:0db8:85a3:0000:0000:8a2e:0370:7334</IPv6_1>
<IPv6_2>2001:DB8::8a2e:370:7334</IPv6_2>
<IPv6_3>::1</IPv6_3>
<IPv6_4>::</IPv6_4>
<Domain_1>www.example.com.</Domain_1>
<Domain_2>a.co</Domain_2>
<Domain_3>localhost</Domain_3>
<Domain_4>xn--fiqs8s.xn--fiqz9s</Domain_4>
<IPv4_Invalid_1>192.168.1.256</IPv4_Invalid_1>
<IPv4_Invalid_2>192.168.1.1.1</IPv4_Invalid_2>
<IPv4_Invalid_3>192.168.1.99999999999999999999</IPv4_Invalid_3>
<IPv4_Invalid_4>192.168.1.a</IPv4_Invalid_4>
<IPv6_Invalid_1>2001:0db8:85a3:::8a2e:0370:7334</IPv6_Invalid_1>
<IPv6_Invalid_2>1200::AB00:1234::2552:7777:1313</IPv6_Invalid_2>
<IPv6_Invalid_3>1200::AB00:1234:Q000:2552:7777:1313</IPv6_Invalid_3>
<IPv6_Invalid_4>1200:AB00:1234:2552:7777:1313:FFFF</IPv6_Invalid_4>
<Domain_Invalid_1>example.com..</Domain_Invalid_1>
<Domain_Invalid_2>5example.com</Domain_Invalid_2>
<Domain_Invalid_3>example.com-</Domain_Invalid_3>
<Domain_Invalid_4>exa_mple.com</Domain_Invalid_4>
</clickhouse>)CONFIG");
Poco::XML::DOMParser dom_parser;
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
EXPECT_NO_THROW(config->getHost("IPv4_1"));
EXPECT_NO_THROW(config->getHost("IPv4_2"));
EXPECT_NO_THROW(config->getHost("IPv4_3"));
EXPECT_NO_THROW(config->getHost("IPv4_4"));
EXPECT_NO_THROW(config->getHost("IPv6_1"));
EXPECT_NO_THROW(config->getHost("IPv6_2"));
EXPECT_NO_THROW(config->getHost("IPv6_3"));
EXPECT_NO_THROW(config->getHost("IPv6_4"));
EXPECT_NO_THROW(config->getHost("Domain_1"));
EXPECT_NO_THROW(config->getHost("Domain_2"));
EXPECT_NO_THROW(config->getHost("Domain_3"));
EXPECT_NO_THROW(config->getHost("Domain_4"));
EXPECT_THROW(config->getHost("IPv4_Invalid_1"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv4_Invalid_2"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv4_Invalid_3"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv4_Invalid_4"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv6_Invalid_1"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv6_Invalid_2"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv6_Invalid_3"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("IPv6_Invalid_4"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("Domain_Invalid_1"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("Domain_Invalid_2"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("Domain_Invalid_3"), Poco::SyntaxException);
EXPECT_THROW(config->getHost("Domain_Invalid_4"), Poco::SyntaxException);
}

View File

@ -17,11 +17,12 @@
#include <Core/ExternalTable.h>
#include <Core/Settings.h>
#include <Poco/Net/MessageHeader.h>
#include <Parsers/ASTNameTypePair.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <base/scope_guard.h>
#include <Poco/Net/MessageHeader.h>
namespace DB
@ -85,7 +86,15 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg
/// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction.
/// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty.
if (column)
structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false));
structure.emplace_back(
column->name,
column->type->formatWithPossiblyHidingSensitiveData(
/*max_length=*/0,
/*one_line=*/true,
/*show_secrets=*/true,
/*print_pretty_type_names=*/false,
/*always_quote_identifiers=*/false,
/*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage());
}
@ -102,7 +111,15 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);
for (size_t i = 0; i < type_list_raw->children.size(); ++i)
structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false));
structure.emplace_back(
"_" + toString(i + 1),
type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(
/*max_length=*/0,
/*one_line=*/true,
/*show_secrets=*/true,
/*print_pretty_type_names=*/false,
/*always_quote_identifiers=*/false,
/*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
}
void BaseExternalTable::initSampleBlock()

View File

@ -1297,6 +1297,9 @@ class IColumn;
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \
M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, "Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple", 0) \
\
M(Bool, output_format_always_quote_identifiers, false, "Always quote identifiers", 0) \
M(IdentifierQuotingStyle, output_format_identifier_quoting_style, IdentifierQuotingStyle::Backticks, "Set the quoting style for identifiers", 0) \
// End of FORMAT_FACTORY_SETTINGS

View File

@ -75,6 +75,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"create_if_not_exists", false, false, "New setting."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"output_format_always_quote_identifiers", false, false, "New setting."},
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."}
}
},
{"24.8",

View File

@ -244,4 +244,10 @@ IMPLEMENT_SETTING_ENUM(
GroupArrayActionWhenLimitReached,
ErrorCodes::BAD_ARGUMENTS,
{{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}})
IMPLEMENT_SETTING_ENUM(IdentifierQuotingStyle, ErrorCodes::BAD_ARGUMENTS,
{{"None", IdentifierQuotingStyle::None},
{"Backticks", IdentifierQuotingStyle::Backticks},
{"DoubleQuotes", IdentifierQuotingStyle::DoubleQuotes},
{"BackticksMySQL", IdentifierQuotingStyle::BackticksMySQL}})
}

View File

@ -10,6 +10,7 @@
#include <Formats/FormatSettings.h>
#include <IO/ReadSettings.h>
#include <Parsers/ASTSQLSecurity.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <QueryPipeline/SizeLimits.h>
#include <Common/ShellCommandSettings.h>
@ -351,6 +352,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateT
DECLARE_SETTING_ENUM(SQLSecurityType)
DECLARE_SETTING_ENUM(IdentifierQuotingStyle)
enum class GroupArrayActionWhenLimitReached : uint8_t
{
THROW,

View File

@ -396,7 +396,7 @@ const ActionsDAG::Node * ActionsDAG::tryFindInOutputs(const std::string & name)
return nullptr;
}
ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutpus(const Names & names) const
ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutputs(const Names & names) const
{
NodeRawConstPtrs required_nodes;
required_nodes.reserve(names.size());
@ -524,7 +524,7 @@ void ActionsDAG::removeUnusedActions(const NameSet & required_names, bool allow_
void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_remove_inputs, bool allow_constant_folding)
{
auto required_nodes = findInOutpus(required_names);
auto required_nodes = findInOutputs(required_names);
outputs.swap(required_nodes);
removeUnusedActions(allow_remove_inputs, allow_constant_folding);
}

View File

@ -156,7 +156,7 @@ public:
const Node * tryFindInOutputs(const std::string & name) const;
/// Same, but for the list of names.
NodeRawConstPtrs findInOutpus(const Names & names) const;
NodeRawConstPtrs findInOutputs(const Names & names) const;
/// Find first node with the same name in output nodes and replace it.
/// If was not found, add node to outputs end.
@ -436,7 +436,7 @@ public:
/// Returns a list of nodes representing atomic predicates.
static NodeRawConstPtrs extractConjunctionAtoms(const Node * predicate);
/// Get a list of nodes. For every node, check if it can be compused using allowed subset of inputs.
/// Get a list of nodes. For every node, check if it can be computed using allowed subset of inputs.
/// Returns only those nodes from the list which can be computed.
static NodeRawConstPtrs filterNodesByAllowedInputs(
NodeRawConstPtrs nodes,

View File

@ -33,6 +33,8 @@
#include <Common/SensitiveDataMasker.h>
#include <Common/SipHash.h>
#include <Common/logger_useful.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
namespace CurrentMetrics
{
@ -308,6 +310,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
/* no_squash */ false,
/* no_destination */ false,
/* async_insert */ false);
auto table = interpreter.getTable(insert_query);
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
@ -318,6 +321,10 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
/// InterpreterInsertQuery::getTable() -> ITableFunction::execute().
if (insert_query.table_id)
query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames());
insert_query.columns = std::make_shared<ASTExpressionList>();
for (const auto & column : sample_block)
insert_query.columns->children.push_back(std::make_shared<ASTIdentifier>(column.name));
}
AsynchronousInsertQueue::PushResult
@ -696,6 +703,17 @@ catch (...)
tryLogCurrentException("AsynchronousInsertQueue", "Failed to add elements to AsynchronousInsertLog");
}
void convertBlockToHeader(Block & block, const Block & header)
{
auto converting_dag = ActionsDAG::makeConvertingActions(
block.getColumnsWithTypeAndName(),
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Name);
auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
converting_actions->execute(block);
}
String serializeQuery(const IAST & query, size_t max_length)
{
return query.hasSecretParts()
@ -791,6 +809,61 @@ try
if (async_insert_log)
log_elements.reserve(data->entries.size());
auto add_entry_to_asynchronous_insert_log = [&, query_by_format = NameToNameMap{}](
const InsertData::EntryPtr & entry,
const String & parsing_exception,
size_t num_rows,
size_t num_bytes) mutable
{
if (!async_insert_log)
return;
AsynchronousInsertLogElement elem;
elem.event_time = timeInSeconds(entry->create_time);
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
elem.database = query_database;
elem.table = query_table;
elem.format = entry->format;
elem.query_id = entry->query_id;
elem.bytes = num_bytes;
elem.rows = num_rows;
elem.exception = parsing_exception;
elem.data_kind = entry->chunk.getDataKind();
elem.timeout_milliseconds = data->timeout_ms.count();
elem.flush_query_id = insert_query_id;
auto get_query_by_format = [&](const String & format) -> const String &
{
auto [it, inserted] = query_by_format.try_emplace(format);
if (!inserted)
return it->second;
auto query = key.query->clone();
assert_cast<ASTInsertQuery &>(*query).format = format;
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
return it->second;
};
if (entry->chunk.getDataKind() == DataKind::Parsed)
elem.query_for_logging = key.query_str;
else
elem.query_for_logging = get_query_by_format(entry->format);
/// If there was a parsing error,
/// the entry won't be flushed anyway,
/// so add the log element immediately.
if (!elem.exception.empty())
{
elem.status = AsynchronousInsertLogElement::ParsingError;
async_insert_log->add(std::move(elem));
}
else
{
elem.status = AsynchronousInsertLogElement::Ok;
log_elements.push_back(std::move(elem));
}
};
try
{
interpreter = std::make_unique<InterpreterInsertQuery>(
@ -819,49 +892,20 @@ try
catch (...)
{
logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds());
if (async_insert_log)
{
for (const auto & entry : data->entries)
add_entry_to_asynchronous_insert_log(entry, /*parsing_exception=*/ "", /*num_rows=*/ 0, entry->chunk.byteSize());
auto exception = getCurrentExceptionMessage(false);
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception);
}
throw;
}
auto add_entry_to_asynchronous_insert_log = [&](const auto & entry,
const auto & entry_query_for_logging,
const auto & exception,
size_t num_rows,
size_t num_bytes,
Milliseconds timeout_ms)
{
if (!async_insert_log)
return;
AsynchronousInsertLogElement elem;
elem.event_time = timeInSeconds(entry->create_time);
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
elem.query_for_logging = entry_query_for_logging;
elem.database = query_database;
elem.table = query_table;
elem.format = entry->format;
elem.query_id = entry->query_id;
elem.bytes = num_bytes;
elem.rows = num_rows;
elem.exception = exception;
elem.data_kind = entry->chunk.getDataKind();
elem.timeout_milliseconds = timeout_ms.count();
elem.flush_query_id = insert_query_id;
/// If there was a parsing error,
/// the entry won't be flushed anyway,
/// so add the log element immediately.
if (!elem.exception.empty())
{
elem.status = AsynchronousInsertLogElement::ParsingError;
async_insert_log->add(std::move(elem));
}
else
{
log_elements.push_back(elem);
}
};
auto finish_entries = [&]
auto finish_entries = [&](size_t num_rows, size_t num_bytes)
{
for (const auto & entry : data->entries)
{
@ -874,20 +918,7 @@ try
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, "");
}
};
Chunk chunk;
auto header = pipeline.getHeader();
if (key.data_kind == DataKind::Parsed)
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
else
chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log);
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes)
{
LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
queue_shard_flush_time_history.updateWithCurrentTime();
@ -896,16 +927,24 @@ try
query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
};
if (chunk.getNumRows() == 0)
{
finish_entries();
log_and_add_finish_to_query_log(0, 0);
return;
}
try
{
Chunk chunk;
auto header = pipeline.getHeader();
if (key.data_kind == DataKind::Parsed)
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
else
chunk = processPreprocessedEntries(data, header, add_entry_to_asynchronous_insert_log);
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
if (chunk.getNumRows() == 0)
{
finish_entries(/*num_rows=*/ 0, /*num_bytes=*/ 0);
return;
}
size_t num_rows = chunk.getNumRows();
size_t num_bytes = chunk.bytes();
@ -915,7 +954,7 @@ try
CompletedPipelineExecutor completed_executor(pipeline);
completed_executor.execute();
log_and_add_finish_to_query_log(num_rows, num_bytes);
finish_entries(num_rows, num_bytes);
}
catch (...)
{
@ -929,8 +968,6 @@ try
}
throw;
}
finish_entries();
}
catch (const Exception & e)
{
@ -991,7 +1028,6 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
for (const auto & entry : data->entries)
{
@ -1009,7 +1045,8 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
size_t num_rows = executor.execute(*buffer);
total_rows += num_rows;
/// for some reason, client can pass zero rows and bytes to server.
/// For some reason, client can pass zero rows and bytes to server.
/// We don't update offsets in this case, because we assume every insert has some rows during dedup
/// but we have nothing to deduplicate for this insert.
if (num_rows > 0)
@ -1018,8 +1055,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
chunk_info->tokens.push_back(entry->async_dedup_token);
}
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
add_to_async_insert_log(entry, current_exception, num_rows, num_bytes);
current_exception.clear();
entry->resetChunk();
}
@ -1031,30 +1067,14 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
template <typename LogFunc>
Chunk AsynchronousInsertQueue::processPreprocessedEntries(
const InsertQuery & key,
const InsertDataPtr & data,
const Block & header,
const ContextPtr & insert_context,
LogFunc && add_to_async_insert_log)
{
size_t total_rows = 0;
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto result_columns = header.cloneEmptyColumns();
std::unordered_map<String, String> format_to_query;
auto get_query_by_format = [&](const String & format) -> const String &
{
auto [it, inserted] = format_to_query.try_emplace(format);
if (!inserted)
return it->second;
auto query = key.query->clone();
assert_cast<ASTInsertQuery &>(*query).format = format;
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
return it->second;
};
for (const auto & entry : data->entries)
{
const auto * block = entry->chunk.asBlock();
@ -1062,23 +1082,26 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected entry with data kind Preprocessed. Got: {}", entry->chunk.getDataKind());
auto columns = block->getColumns();
Block block_to_insert = *block;
if (!isCompatibleHeader(block_to_insert, header))
convertBlockToHeader(block_to_insert, header);
auto columns = block_to_insert.getColumns();
for (size_t i = 0, s = columns.size(); i < s; ++i)
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
total_rows += block->rows();
/// for some reason, client can pass zero rows and bytes to server.
total_rows += block_to_insert.rows();
/// For some reason, client can pass zero rows and bytes to server.
/// We don't update offsets in this case, because we assume every insert has some rows during dedup,
/// but we have nothing to deduplicate for this insert.
if (block->rows())
if (block_to_insert.rows() > 0)
{
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
}
const auto & query_for_logging = get_query_by_format(entry->format);
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
add_to_async_insert_log(entry, /*parsing_exception=*/ "", block_to_insert.rows(), block_to_insert.bytes());
entry->resetChunk();
}

View File

@ -288,10 +288,8 @@ private:
template <typename LogFunc>
static Chunk processPreprocessedEntries(
const InsertQuery & key,
const InsertDataPtr & data,
const Block & header,
const ContextPtr & insert_context,
LogFunc && add_to_async_insert_log);
template <typename E>

View File

@ -30,6 +30,8 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription()
{"id", std::make_shared<DataTypeUInt64>(), "ID of processor."},
{"parent_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "Parent processors IDs."},
{"plan_step", std::make_shared<DataTypeUInt64>(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."},
{"plan_step_name", std::make_shared<DataTypeString>(), "Name of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
{"plan_step_description", std::make_shared<DataTypeString>(), "Description of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
{"plan_group", std::make_shared<DataTypeUInt64>(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."},
{"initial_query_id", std::make_shared<DataTypeString>(), "ID of the initial query (for distributed query execution)."},
@ -64,6 +66,8 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const
}
columns[i++]->insert(plan_step);
columns[i++]->insert(plan_step_name);
columns[i++]->insert(plan_step_description);
columns[i++]->insert(plan_group);
columns[i++]->insertData(initial_query_id.data(), initial_query_id.size());
columns[i++]->insertData(query_id.data(), query_id.size());

View File

@ -19,6 +19,8 @@ struct ProcessorProfileLogElement
UInt64 plan_step{};
UInt64 plan_group{};
String plan_step_name;
String plan_step_description;
String initial_query_id;
String query_id;

View File

@ -478,6 +478,8 @@ void logQueryFinish(
processor_elem.parent_ids = std::move(parents);
processor_elem.plan_step = reinterpret_cast<std::uintptr_t>(processor->getQueryPlanStep());
processor_elem.plan_step_name = processor->getPlanStepName();
processor_elem.plan_step_description = processor->getPlanStepDescription();
processor_elem.plan_group = processor->getQueryPlanStepGroup();
processor_elem.processor_name = processor->getName();
@ -793,7 +795,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// Verify that AST formatting is consistent:
/// If you format AST, parse it back, and format it again, you get the same string.
String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false);
String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);
/// The query can become more verbose after formatting, so:
size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0;
@ -818,7 +820,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
chassert(ast2);
String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false);
String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);
if (formatted1 != formatted2)
throw Exception(ErrorCodes::LOGICAL_ERROR,

View File

@ -26,7 +26,12 @@ inline String format(const SecretHidingFormatSettings & settings)
&& settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect);
return settings.query.formatWithPossiblyHidingSensitiveData(
settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names);
settings.max_length,
settings.one_line,
show_secrets,
settings.ctx->getSettingsRef().print_pretty_type_names,
settings.ctx->getSettingsRef().output_format_always_quote_identifiers,
settings.ctx->getSettingsRef().output_format_identifier_quoting_style);
}
}

View File

@ -66,8 +66,8 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo
{
frame.need_parens = false;
/// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query.
format_settings.ostr << backQuote(name);
/// We have to always quote column names to avoid ambiguity with INDEX and other declarations in CREATE query.
format_settings.quoteIdentifier(name);
if (type)
{

View File

@ -35,7 +35,7 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin
{
frame.need_parens = false;
settings.ostr << backQuote(name);
settings.quoteIdentifier(name);
if (type)
{

View File

@ -79,7 +79,7 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta
}
else
{
s.ostr << backQuoteIfNeed(name);
s.writeIdentifier(name);
s.ostr << " ";
expr->formatImpl(s, state, frame);
}

View File

@ -17,7 +17,7 @@ ASTPtr ASTProjectionDeclaration::clone() const
void ASTProjectionDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << backQuoteIfNeed(name);
settings.writeIdentifier(name);
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
std::string nl_or_nothing = settings.one_line ? "" : "\n";
settings.ostr << settings.nl_or_ws << indent_str << "(" << nl_or_nothing;

View File

@ -22,10 +22,8 @@ ASTPtr ASTTableOverride::clone() const
return res;
}
void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState & state, FormatStateStacked frame) const
void ASTTableOverride::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
FormatSettings settings = settings_;
settings.always_quote_identifiers = true;
String nl_or_nothing = settings.one_line ? "" : "\n";
String nl_or_ws = settings.one_line ? " " : "\n";
String hl_keyword = settings.hilite ? hilite_keyword : "";

View File

@ -165,12 +165,21 @@ size_t IAST::checkDepthImpl(size_t max_depth) const
return res;
}
String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const
String IAST::formatWithPossiblyHidingSensitiveData(
size_t max_length,
bool one_line,
bool show_secrets,
bool print_pretty_type_names,
bool always_quote_identifiers,
IdentifierQuotingStyle identifier_quoting_style) const
{
WriteBufferFromOwnString buf;
FormatSettings settings(buf, one_line);
settings.show_secrets = show_secrets;
settings.print_pretty_type_names = print_pretty_type_names;
settings.always_quote_identifiers = always_quote_identifiers;
settings.identifier_quoting_style = identifier_quoting_style;
format(settings);
return wipeSensitiveDataAndCutToLength(buf.str(), max_length);
}
@ -248,6 +257,34 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const
}
}
void IAST::FormatSettings::quoteIdentifier(const String & name) const
{
switch (identifier_quoting_style)
{
case IdentifierQuotingStyle::None:
{
writeBackQuotedString(name, ostr);
break;
}
case IdentifierQuotingStyle::Backticks:
{
writeBackQuotedString(name, ostr);
break;
}
case IdentifierQuotingStyle::DoubleQuotes:
{
writeDoubleQuotedString(name, ostr);
break;
}
case IdentifierQuotingStyle::BackticksMySQL:
{
writeBackQuotedStringMySQL(name, ostr);
break;
}
}
}
void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const
{
String indent_str(indent, '-');

View File

@ -238,6 +238,9 @@ public:
}
void writeIdentifier(const String & name) const;
// Quote identifier `name` even when `always_quote_identifiers` is false.
// If `identifier_quoting_style` is `IdentifierQuotingStyle::None`, quote it with `IdentifierQuotingStyle::Backticks`
void quoteIdentifier(const String & name) const;
};
/// State. For example, a set of nodes can be remembered, which we already walk through.
@ -278,7 +281,13 @@ public:
/// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied.
/// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience.
String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const;
String formatWithPossiblyHidingSensitiveData(
size_t max_length,
bool one_line,
bool show_secrets,
bool print_pretty_type_names,
bool always_quote_identifiers,
IdentifierQuotingStyle identifier_quoting_style) const;
/** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent
* behaviour is due to the fact such functions are called from Client which knows nothing about
@ -287,12 +296,12 @@ public:
*/
String formatForLogging(size_t max_length = 0) const
{
return formatWithPossiblyHidingSensitiveData(max_length, true, false, false);
return formatWithPossiblyHidingSensitiveData(max_length, true, false, false, false, IdentifierQuotingStyle::Backticks);
}
String formatForErrorMessage() const
{
return formatWithPossiblyHidingSensitiveData(0, true, false, false);
return formatWithPossiblyHidingSensitiveData(0, true, false, false, false, IdentifierQuotingStyle::Backticks);
}
virtual bool hasSecretParts() const { return childrenHaveSecretParts(); }

View File

@ -231,47 +231,47 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest,
},
{
"CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))",
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n PARTITION BY toYYYYMM(`created`)\n)"
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n PARTITION BY toYYYYMM(created)\n)"
},
{
"CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())",
"CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE `tbl`\n(\n\n),\nTABLE OVERRIDE `a`\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE `b`\n(\n PARTITION BY rand()\n)"
"CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE tbl\n(\n\n),\nTABLE OVERRIDE a\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE b\n(\n PARTITION BY rand()\n)"
},
{
"CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))",
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(`created`)\n)"
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(created)\n)"
},
{
"CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))",
"CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n INDEX foo `foo` TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(`_staged` = 1, 'staging', toYYYYMM(`created`))\n)"
"CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n INDEX foo foo TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created))\n)"
},
{
"CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)",
"CREATE DATABASE db\nTABLE OVERRIDE `t1`\n(\n TTL `inserted` + toIntervalMonth(1)\n),\nTABLE OVERRIDE `t2`\n(\n TTL `inserted` + toIntervalMonth(2)\n)"
"CREATE DATABASE db\nTABLE OVERRIDE t1\n(\n TTL inserted + toIntervalMonth(1)\n),\nTABLE OVERRIDE t2\n(\n TTL inserted + toIntervalMonth(2)\n)"
},
{
"CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)",
"CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE `tab3`\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n),\nTABLE OVERRIDE `tab5`\n(\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n)"
"CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE tab3\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n),\nTABLE OVERRIDE tab5\n(\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n)"
},
{
"CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))",
"CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(`created`)\n)"
"CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(created)\n)"
},
{
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1",
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1",
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1"
},
{
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2",
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2",
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2"
},
{
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))",
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)"
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)"
},
{
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))",
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)\nCOMMENT 'db comment'"
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)\nCOMMENT 'db comment'"
}
})));

View File

@ -1,5 +1,6 @@
#include <iostream>
#include <Processors/IProcessor.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Common/logger_useful.h>
#include <IO/WriteHelpers.h>
@ -9,6 +10,17 @@
namespace DB
{
void IProcessor::setQueryPlanStep(IQueryPlanStep * step, size_t group)
{
query_plan_step = step;
query_plan_step_group = group;
if (step)
{
plan_step_name = step->getName();
plan_step_description = step->getStepDescription();
}
}
void IProcessor::cancel() noexcept
{

View File

@ -311,14 +311,12 @@ public:
constexpr static size_t NO_STREAM = std::numeric_limits<size_t>::max();
/// Step of QueryPlan from which processor was created.
void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0)
{
query_plan_step = step;
query_plan_step_group = group;
}
void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0);
IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; }
size_t getQueryPlanStepGroup() const { return query_plan_step_group; }
const String & getPlanStepName() const { return plan_step_name; }
const String & getPlanStepDescription() const { return plan_step_description; }
uint64_t getElapsedNs() const { return elapsed_ns; }
uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; }
@ -410,6 +408,8 @@ private:
IQueryPlanStep * query_plan_step = nullptr;
size_t query_plan_step_group = 0;
String plan_step_name;
String plan_step_description;
};

View File

@ -155,7 +155,7 @@ bool isPartitionKeySuitsGroupByKey(
return false;
/// We are interested only in calculations required to obtain group by keys (and not aggregate function arguments for example).
auto key_nodes = group_by_actions.findInOutpus(aggregating.getParams().keys);
auto key_nodes = group_by_actions.findInOutputs(aggregating.getParams().keys);
auto group_by_key_actions = ActionsDAG::cloneSubDAG(key_nodes, /*remove_aliases=*/ true);
const auto & gb_key_required_columns = group_by_key_actions.getRequiredColumnsNames();

View File

@ -83,7 +83,11 @@ void WriteBufferFromHTTPServerResponse::finishSendHeaders()
return;
if (!headers_started_sending)
{
if (compression_method != CompressionMethod::None)
response.set("Content-Encoding", toContentEncodingName(compression_method));
startSendHeaders();
}
writeHeaderSummary();
writeExceptionCode();
@ -105,7 +109,13 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
initialized = true;
if (compression_method != CompressionMethod::None)
response.set("Content-Encoding", toContentEncodingName(compression_method));
{
/// If we've already sent headers, just send the `Content-Encoding` down the socket directly
if (headers_started_sending)
socketSendStr("Content-Encoding: " + toContentEncodingName(compression_method) + "\r\n");
else
response.set("Content-Encoding", toContentEncodingName(compression_method));
}
startSendHeaders();
finishSendHeaders();
@ -177,8 +187,12 @@ void WriteBufferFromHTTPServerResponse::finalizeImpl()
/// If no body data just send header
startSendHeaders();
/// `finalizeImpl` must be idempotent, so set `initialized` here to not send stuff twice
if (!initialized && offset() && compression_method != CompressionMethod::None)
{
initialized = true;
socketSendStr("Content-Encoding: " + toContentEncodingName(compression_method) + "\r\n");
}
finishSendHeaders();
}

View File

@ -427,6 +427,7 @@ class CI:
pr_only=True,
# TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf
# reference_job_name=JobNames.INTEGRATION_TEST_TSAN,
timeout=4 * 3600, # to be able to process many updated tests
),
JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE],

View File

@ -33,7 +33,7 @@ CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse"
CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge"
CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge"
FLAKY_TRIES_COUNT = 10 # run whole pytest several times
FLAKY_TRIES_COUNT = 3 # run whole pytest several times
FLAKY_REPEAT_COUNT = 5 # runs test case in single module several times
MAX_TIME_SECONDS = 3600
@ -782,47 +782,71 @@ class ClickhouseIntegrationTestsRunner:
logging.info("Found '%s' tests to run", " ".join(tests_to_run))
result_state = "success"
description_prefix = "No flaky tests: "
start = time.time()
logging.info("Starting check with retries")
final_retry = 0
logs = []
tries_num = 1 if should_fail else FLAKY_TRIES_COUNT
for i in range(tries_num):
if timeout_expired:
print("Timeout expired - break flaky check execution")
break
final_retry += 1
logging.info("Running tests for the %s time", i)
counters, tests_times, log_paths = self.try_run_test_group(
repo_path,
"bugfix" if should_fail else "flaky",
tests_to_run,
1,
1,
FLAKY_REPEAT_COUNT,
)
logs += log_paths
if counters["FAILED"]:
logging.info("Found failed tests: %s", " ".join(counters["FAILED"]))
description_prefix = "Failed tests found: "
result_state = "failure"
if not should_fail:
counters = {
"ERROR": [],
"PASSED": [],
"FAILED": [],
"SKIPPED": [],
"BROKEN": [],
"NOT_FAILED": [],
} # type: Dict
tests_times = defaultdict(float) # type: Dict
tests_log_paths = defaultdict(list)
id_counter = 0
for test_to_run in tests_to_run:
tries_num = 1 if should_fail else FLAKY_TRIES_COUNT
for i in range(tries_num):
if timeout_expired:
print("Timeout expired - break flaky check execution")
break
if counters["ERROR"]:
description_prefix = "Failed tests found: "
logging.info("Found error tests: %s", " ".join(counters["ERROR"]))
# NOTE "error" result state will restart the whole test task,
# so we use "failure" here
result_state = "failure"
if not should_fail:
final_retry += 1
logging.info("Running tests for the %s time", i)
group_counters, group_test_times, log_paths = self.try_run_test_group(
repo_path,
f"bugfix_{id_counter}" if should_fail else f"flaky{id_counter}",
[test_to_run],
1,
1,
FLAKY_REPEAT_COUNT,
)
id_counter = id_counter + 1
for counter, value in group_counters.items():
logging.info(
"Tests from group %s stats, %s count %s",
test_to_run,
counter,
len(value),
)
counters[counter] += value
for test_name, test_time in group_test_times.items():
tests_times[test_name] = test_time
tests_log_paths[test_name] = log_paths
if not should_fail and (
group_counters["FAILED"] or group_counters["ERROR"]
):
logging.info(
"Unexpected failure in group %s. Fail fast for current group",
test_to_run,
)
break
logging.info("Try is OK, all tests passed, going to clear env")
clear_ip_tables_and_restart_daemons()
logging.info("And going to sleep for some time")
if time.time() - start > MAX_TIME_SECONDS:
logging.info("Timeout reached, going to finish flaky check")
break
time.sleep(5)
if counters["FAILED"]:
logging.info("Found failed tests: %s", " ".join(counters["FAILED"]))
description_prefix = "Failed tests found: "
result_state = "failure"
if counters["ERROR"]:
description_prefix = "Failed tests found: "
logging.info("Found error tests: %s", " ".join(counters["ERROR"]))
# NOTE "error" result state will restart the whole test task,
# so we use "failure" here
result_state = "failure"
logging.info("Try is OK, all tests passed, going to clear env")
clear_ip_tables_and_restart_daemons()
logging.info("And going to sleep for some time")
time.sleep(5)
test_result = []
for state in ("ERROR", "FAILED", "PASSED", "SKIPPED"):
@ -833,13 +857,10 @@ class ClickhouseIntegrationTestsRunner:
else:
text_state = state
test_result += [
(
c + " (✕" + str(final_retry) + ")",
text_state,
f"{tests_times[c]:.2f}",
)
(c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c])
for c in counters[state]
]
status_text = description_prefix + ", ".join(
[
str(n).lower().replace("failed", "fail") + ": " + str(len(c))
@ -847,26 +868,50 @@ class ClickhouseIntegrationTestsRunner:
]
)
return result_state, status_text, test_result, logs
return result_state, status_text, test_result, tests_log_paths
def run_impl(self, repo_path, build_path):
stopwatch = Stopwatch()
if self.flaky_check or self.bugfix_validate_check:
return self.run_flaky_check(
repo_path, build_path, should_fail=self.bugfix_validate_check
result_state, status_text, test_result, tests_log_paths = (
self.run_flaky_check(
repo_path, build_path, should_fail=self.bugfix_validate_check
)
)
else:
result_state, status_text, test_result, tests_log_paths = (
self.run_normal_check(build_path, repo_path)
)
self._install_clickhouse(build_path)
if self.soft_deadline_time < time.time():
status_text = "Timeout, " + status_text
result_state = "failure"
if timeout_expired:
logging.error(
"Job killed by external timeout signal - setting status to failure!"
)
status_text = "Job timeout expired, " + status_text
result_state = "failure"
# add mock test case to make timeout visible in job report and in ci db
test_result.insert(
0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "")
)
if "(memory)" in self.params["context_name"]:
result_state = "success"
return result_state, status_text, test_result, tests_log_paths
def run_normal_check(self, build_path, repo_path):
self._install_clickhouse(build_path)
logging.info("Pulling images")
self._pre_pull_images(repo_path)
logging.info(
"Dump iptables before run %s",
subprocess.check_output("sudo iptables -nvL", shell=True),
)
all_tests = self._get_all_tests(repo_path)
if self.run_by_hash_total != 0:
grouped_tests = self.group_test_by_file(all_tests)
all_filtered_by_hash_tests = []
@ -874,7 +919,6 @@ class ClickhouseIntegrationTestsRunner:
if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num:
all_filtered_by_hash_tests += tests_in_group
all_tests = all_filtered_by_hash_tests
parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path)
logging.info(
"Found %s tests first 3 %s", len(all_tests), " ".join(all_tests[:3])
@ -906,14 +950,12 @@ class ClickhouseIntegrationTestsRunner:
len(not_found_tests),
" ".join(not_found_tests[:3]),
)
grouped_tests = self.group_test_by_file(filtered_sequential_tests)
i = 0
for par_group in chunks(filtered_parallel_tests, PARALLEL_GROUP_SIZE):
grouped_tests[f"parallel{i}"] = par_group
i += 1
logging.info("Found %s tests groups", len(grouped_tests))
counters = {
"ERROR": [],
"PASSED": [],
@ -924,14 +966,11 @@ class ClickhouseIntegrationTestsRunner:
} # type: Dict
tests_times = defaultdict(float)
tests_log_paths = defaultdict(list)
items_to_run = list(grouped_tests.items())
logging.info("Total test groups %s", len(items_to_run))
if self.shuffle_test_groups():
logging.info("Shuffling test groups")
random.shuffle(items_to_run)
for group, tests in items_to_run:
if timeout_expired:
print("Timeout expired - break tests execution")
@ -959,7 +998,6 @@ class ClickhouseIntegrationTestsRunner:
if len(counters["FAILED"]) + len(counters["ERROR"]) >= 20:
logging.info("Collected more than 20 failed/error tests, stopping")
break
if counters["FAILED"] or counters["ERROR"]:
logging.info(
"Overall status failure, because we have tests in FAILED or ERROR state"
@ -968,7 +1006,6 @@ class ClickhouseIntegrationTestsRunner:
else:
logging.info("Overall success!")
result_state = "success"
test_result = []
for state in (
"ERROR",
@ -988,33 +1025,14 @@ class ClickhouseIntegrationTestsRunner:
(c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c])
for c in counters[state]
]
failed_sum = len(counters["FAILED"]) + len(counters["ERROR"])
status_text = f"fail: {failed_sum}, passed: {len(counters['PASSED'])}"
if self.soft_deadline_time < time.time():
status_text = "Timeout, " + status_text
result_state = "failure"
if timeout_expired:
logging.error(
"Job killed by external timeout signal - setting status to failure!"
)
status_text = "Job timeout expired, " + status_text
result_state = "failure"
# add mock test case to make timeout visible in job report and in ci db
test_result.insert(
0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "")
)
if not counters or sum(len(counter) for counter in counters.values()) == 0:
status_text = "No tests found for some reason! It's a bug"
result_state = "failure"
if "(memory)" in self.params["context_name"]:
result_state = "success"
return result_state, status_text, test_result, []
return result_state, status_text, test_result, tests_log_paths
def write_results(results_file, status_file, results, status):
@ -1047,7 +1065,9 @@ def run():
logging.info("Clearing dmesg before run")
subprocess.check_call("sudo -E dmesg --clear", shell=True)
state, description, test_results, _ = runner.run_impl(repo_path, build_path)
state, description, test_results, _test_log_paths = runner.run_impl(
repo_path, build_path
)
logging.info("Tests finished")
if IS_CI:

View File

@ -245,9 +245,9 @@ class PostgresManager:
):
postgres_database = self.database_or_default(postgres_database)
self.created_materialized_postgres_db_list.add(materialized_database)
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}")
self.instance.query(f"DROP DATABASE IF EXISTS `{materialized_database}`")
create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', '{user}', '{password}')"
create_query = f"CREATE DATABASE `{materialized_database}` ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', '{user}', '{password}')"
if len(settings) > 0:
create_query += " SETTINGS "
for i in range(len(settings)):
@ -259,7 +259,7 @@ class PostgresManager:
assert materialized_database in self.instance.query("SHOW DATABASES")
def drop_materialized_db(self, materialized_database="test_database"):
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC")
self.instance.query(f"DROP DATABASE IF EXISTS `{materialized_database}` SYNC")
if materialized_database in self.created_materialized_postgres_db_list:
self.created_materialized_postgres_db_list.remove(materialized_database)
@ -329,11 +329,15 @@ def assert_nested_table_is_created(
table = schema_name + "." + table_name
print(f"Checking table {table} exists in {materialized_database}")
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
database_tables = instance.query(
f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'"
)
while table not in database_tables:
time.sleep(0.2)
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
database_tables = instance.query(
f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'"
)
assert table in database_tables
@ -366,9 +370,9 @@ def check_tables_are_synchronized(
table_path = ""
if len(schema_name) == 0:
table_path = f"{materialized_database}.{table_name}"
table_path = f"`{materialized_database}`.`{table_name}`"
else:
table_path = f"{materialized_database}.`{schema_name}.{table_name}`"
table_path = f"`{materialized_database}`.`{schema_name}.{table_name}`"
print(f"Checking table is synchronized: {table_path}")
result_query = f"select * from {table_path} order by {order_by};"

View File

@ -73,3 +73,8 @@ def test_default_database_on_cluster(started_cluster):
database="test_default_database",
sql="SHOW CREATE test_local_table FORMAT TSV",
).endswith("old_parts_lifetime = 100\n")
ch1.query(
database="test_default_database",
sql="DROP TABLE test_local_table ON CLUSTER 'cluster' SYNC",
)

View File

@ -80,3 +80,6 @@ def test_replica_always_download(started_cluster):
assert int(node1_parts) < 10
assert int(node2_parts) < 10
node1.query_with_retry("DROP TABLE test_table SYNC")
node2.query_with_retry("DROP TABLE test_table SYNC")

View File

@ -104,7 +104,7 @@ def test_with_merge_tree():
_insert_queries_sequentially(
table_name,
_query_settings,
iterations=100,
iterations=10,
max_values_size=1000,
array_size_range=[10, 50],
)
@ -125,7 +125,7 @@ def test_with_merge_tree_multithread():
table_name,
_query_settings,
thread_num=15,
tasks=1000,
tasks=100,
max_values_size=1000,
array_size_range=[10, 15],
)
@ -152,12 +152,12 @@ def test_with_replicated_merge_tree():
_insert_queries_sequentially(
table_name,
settings,
iterations=100,
iterations=10,
max_values_size=1000,
array_size_range=[10, 50],
)
node.query("DROP TABLE IF EXISTS {}".format(table_name))
node.query("DROP TABLE {} SYNC".format(table_name))
def test_with_replicated_merge_tree_multithread():
@ -180,12 +180,12 @@ def test_with_replicated_merge_tree_multithread():
table_name,
_query_settings,
thread_num=15,
tasks=1000,
tasks=100,
max_values_size=1000,
array_size_range=[10, 15],
)
node.query("DROP TABLE IF EXISTS {}".format(table_name))
node.query("DROP TABLE {} SYNC".format(table_name))
# Ensure that the combined duration of inserts with adaptive timeouts is less than
@ -200,13 +200,13 @@ def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeo
fixed_tm_settings = copy.copy(_query_settings)
fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0
fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
fixed_tm_settings["async_insert_busy_timeout_ms"] = 100
fixed_tm_run_duration = timeit.timeit(
lambda: _insert_queries_sequentially(
fixed_tm_table_name,
fixed_tm_settings,
iterations=100,
iterations=50,
max_values_size=1000,
array_size_range=[10, 50],
),
@ -231,13 +231,13 @@ def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeo
adaptive_tm_settings = copy.copy(_query_settings)
adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10
adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 1000
adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 500
adaptive_tm_run_duration = timeit.timeit(
lambda: _insert_queries_sequentially(
adaptive_tm_table_name,
adaptive_tm_settings,
iterations=100,
iterations=50,
max_values_size=1000,
array_size_range=[10, 50],
),
@ -268,14 +268,14 @@ def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeout
fixed_tm_settings = copy.copy(_query_settings)
fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0
fixed_tm_settings["async_insert_busy_timeout_ms"] = 200
fixed_tm_settings["async_insert_busy_timeout_ms"] = 500
fixed_tm_run_duration = timeit.timeit(
lambda: _insert_queries_in_parallel(
fixed_tm_table_name,
fixed_tm_settings,
thread_num=15,
tasks=1000,
tasks=150,
max_values_size=1000,
array_size_range=[10, 50],
),
@ -300,14 +300,14 @@ def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeout
adaptive_tm_settings = copy.copy(_query_settings)
adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10
adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 200
adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 500
adaptive_tm_run_duration = timeit.timeit(
lambda: _insert_queries_in_parallel(
adaptive_tm_table_name,
adaptive_tm_settings,
thread_num=15,
tasks=100,
tasks=150,
max_values_size=1000,
array_size_range=[10, 50],
),
@ -344,29 +344,34 @@ def test_change_queries_frequency():
settings = copy.copy(_query_settings)
min_ms = 50
settings["async_insert_busy_timeout_min_ms"] = min_ms
settings["async_insert_busy_timeout_max_ms"] = 2000
max_ms = 200
_insert_queries_in_parallel(
table_name,
settings,
thread_num=15,
tasks=2000,
max_values_size=1000,
array_size_range=[10, 15],
)
settings["async_insert_busy_timeout_min_ms"] = min_ms
settings["async_insert_busy_timeout_max_ms"] = max_ms
_insert_queries_sequentially(
table_name,
settings,
iterations=200,
iterations=50,
max_values_size=1000,
array_size_range=[10, 50],
)
select_log_query = "SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 50"
node.query("SYSTEM FLUSH LOGS")
select_log_query = f"SELECT countIf(timeout_milliseconds - {min_ms} < 25) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)"
res = node.query(select_log_query)
for line in res.splitlines():
assert int(line) == min_ms
assert int(res) >= 5
node.query("DROP TABLE IF EXISTS {}".format(table_name))
_insert_queries_in_parallel(
table_name,
settings,
thread_num=10,
tasks=1000,
max_values_size=1000,
array_size_range=[10, 15],
)
node.query("SYSTEM FLUSH LOGS")
select_log_query = f"SELECT countIf({max_ms} - timeout_milliseconds < 100) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)"
res = node.query(select_log_query)
assert int(res) >= 5
node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name))

View File

@ -28,9 +28,6 @@ def started_cluster():
"""
CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary;
CREATE DATABASE IF NOT EXISTS test;
DROP TABLE IF EXISTS test.elements;
CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log;
INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7);
"""
)
@ -49,6 +46,13 @@ def get_status(dictionary_name):
def test_dict_get_data(started_cluster):
query = instance.query
query(
"CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log;"
)
query(
"INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7);"
)
# dictionaries_lazy_load == false, so these dictionary are not loaded.
assert get_status("dep_x") == "NOT_LOADED"
assert get_status("dep_y") == "NOT_LOADED"
@ -97,6 +101,8 @@ def test_dict_get_data(started_cluster):
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "XX\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ZZ\n"
query("DROP TABLE IF EXISTS test.elements;")
instance.restart_clickhouse()
def dependent_tables_assert():
@ -175,3 +181,5 @@ def test_multiple_tables(started_cluster):
random.shuffle(order)
for i in order:
assert query(f"select count() from test.table_{i}") == "100\n"
for i in range(tables_count):
query(f"drop table test.table_{i} sync")

View File

@ -26,26 +26,24 @@ def started_cluster():
# asynchronous_metric_update_period_s is being set to 2s so that the metrics are populated faster and
# are available for querying during the test.
def test_event_time_microseconds_field(started_cluster):
try:
cluster.start()
node1.query("SET log_queries = 1;")
node1.query("CREATE DATABASE replica;")
query_create = """CREATE TABLE replica.test
(
id Int64,
event_time DateTime
)
Engine=MergeTree()
PARTITION BY toYYYYMMDD(event_time)
ORDER BY id;"""
time.sleep(2)
node1.query(query_create)
node1.query("""INSERT INTO replica.test VALUES (1, now())""")
node1.query("SYSTEM FLUSH LOGS;")
node1.query("SET log_queries = 1;")
node1.query("CREATE DATABASE replica;")
query_create = """CREATE TABLE replica.test
(
id Int64,
event_time DateTime
)
Engine=MergeTree()
PARTITION BY toYYYYMMDD(event_time)
ORDER BY id;"""
time.sleep(2)
node1.query(query_create)
node1.query("""INSERT INTO replica.test VALUES (1, now())""")
node1.query("SYSTEM FLUSH LOGS;")
test_query = (
"SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log"
)
assert "ok\n" in node1.query(test_query)
finally:
cluster.shutdown()
test_query = (
"SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log"
)
assert "ok\n" in node1.query(test_query)
node1.query("DROP TABLE replica.test")
node1.query("DROP DATABASE replica")

View File

@ -79,3 +79,7 @@ def test_file_path_escaping(started_cluster):
"test -f /var/lib/clickhouse/shadow/2/store/123/12345678-1000-4000-8000-000000000001/1_1_1_0/%7EId.bin",
]
)
node.query("DROP TABLE test.`T.a_b,l-e!` SYNC")
node.query("DROP TABLE `test 2`.`T.a_b,l-e!` SYNC")
node.query("DROP DATABASE test")
node.query("DROP DATABASE `test 2`")

View File

@ -359,6 +359,8 @@ def test_implicit_create_view_grant():
instance.query("GRANT CREATE VIEW ON test.* TO B", user="A")
instance.query("CREATE VIEW test.view_2 AS SELECT 1", user="B")
assert instance.query("SELECT * FROM test.view_2") == "1\n"
instance.query("DROP USER A")
instance.query("DROP VIEW test.view_2")
def test_implicit_create_temporary_table_grant():
@ -530,6 +532,7 @@ def test_current_database():
assert "Not enough privileges" in instance.query_and_get_error(
"SELECT * FROM table", user="A"
)
instance.query("DROP TABLE default.table SYNC")
def test_grant_with_replace_option():

View File

@ -2353,7 +2353,7 @@ def table_overrides(clickhouse_node, mysql_node, service_name):
)
check_query(clickhouse_node, "SELECT count() FROM table_overrides.t1", "1001\n")
show_db = clickhouse_node.query("SHOW CREATE DATABASE table_overrides")
assert "TABLE OVERRIDE `t1`\\n(\\n\\n)" in show_db, show_db
assert "TABLE OVERRIDE t1\\n(\\n\\n)" in show_db, show_db
clickhouse_node.query("DROP DATABASE IF EXISTS table_overrides")
mysql_node.query("DROP DATABASE IF EXISTS table_overrides")

View File

@ -1,5 +1,5 @@
import pytest
import uuid
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
@ -25,19 +25,15 @@ def start_cluster():
def create_tables(cluster, table_name, skip_last_replica):
node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
node2.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
node1.query(
f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)"
f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)"
)
node2.query(
f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)"
f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)"
)
if not skip_last_replica:
node3.query(
f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)"
f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)"
)
# populate data
@ -67,7 +63,7 @@ def test_skip_replicas_without_table(start_cluster):
for i in range(4):
expected_result += f"{i}\t1000\n"
log_comment = "5230b069-9574-407d-9b80-891b5a175f41"
log_comment = uuid.uuid4()
assert (
node1.query(
f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key",
@ -88,6 +84,8 @@ def test_skip_replicas_without_table(start_cluster):
)
== "1\t1\n"
)
node1.query(f"DROP TABLE {table_name} SYNC")
node2.query(f"DROP TABLE {table_name} SYNC")
def test_skip_unresponsive_replicas(start_cluster):
@ -112,3 +110,6 @@ def test_skip_unresponsive_replicas(start_cluster):
)
== expected_result
)
node1.query(f"DROP TABLE {table_name} SYNC")
node2.query(f"DROP TABLE {table_name} SYNC")
node3.query(f"DROP TABLE {table_name} SYNC")

View File

@ -35,11 +35,10 @@ def start_cluster():
def _create_tables(table_name, table_size, index_granularity):
nodes[0].query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER {cluster_name}")
nodes[0].query(
f"""
CREATE TABLE IF NOT EXISTS {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String)
CREATE TABLE {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String)
Engine=ReplicatedMergeTree('/test_parallel_replicas/shard/{table_name}/', '{{replica}}')
ORDER BY (key)
SETTINGS index_granularity = {index_granularity}, max_bytes_to_merge_at_max_space_in_pool = 0, max_bytes_to_merge_at_max_space_in_pool = 1
@ -128,3 +127,4 @@ def test_reading_with_invisible_parts(
)
== f"{expected}\n"
)
nodes[0].query(f"DROP TABLE {table_name} ON CLUSTER {cluster_name} SYNC")

View File

@ -1,5 +1,6 @@
import pytest
import uuid
import time
import psycopg2
import os.path as p
@ -59,7 +60,6 @@ instance2 = cluster.add_instance(
pg_manager = PostgresManager()
pg_manager2 = PostgresManager()
pg_manager_instance2 = PostgresManager()
pg_manager3 = PostgresManager()
@pytest.fixture(scope="module")
@ -82,12 +82,6 @@ def started_cluster():
pg_manager2.init(
instance2, cluster.postgres_ip, cluster.postgres_port, "postgres_database2"
)
pg_manager3.init(
instance,
cluster.postgres_ip,
cluster.postgres_port,
default_database="postgres-postgres",
)
yield cluster
@ -924,16 +918,27 @@ def test_failed_load_from_snapshot(started_cluster):
def test_symbols_in_publication_name(started_cluster):
table = "test_symbols_in_publication_name"
id = uuid.uuid4()
db = f"test_{id}"
table = f"test_symbols_in_publication_name"
pg_manager3 = PostgresManager()
pg_manager3.init(
instance,
cluster.postgres_ip,
cluster.postgres_port,
default_database=db,
)
pg_manager3.create_postgres_table(table)
instance.query(
f"INSERT INTO `{pg_manager3.get_default_database()}`.`{table}` SELECT number, number from numbers(0, 50)"
f"INSERT INTO `{db}`.`{table}` SELECT number, number from numbers(0, 50)"
)
pg_manager3.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
materialized_database=db,
settings=[
f"materialized_postgresql_tables_list = '{table}'",
"materialized_postgresql_backoff_min_ms = 100",
@ -941,8 +946,10 @@ def test_symbols_in_publication_name(started_cluster):
],
)
check_tables_are_synchronized(
instance, table, postgres_database=pg_manager3.get_default_database()
instance, table, materialized_database=db, postgres_database=db
)
pg_manager3.drop_materialized_db(db)
pg_manager3.execute(f'drop table "{table}"')
def test_generated_columns(started_cluster):

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
# Tags: long, no-replicated-database, no-parallel, no-fasttest
# Tags: long, no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-msan, no-ubsan
# no sanitizers -- memory consumption is unpredicatable with sanitizers
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,5 @@
-- Tags: long, no-parallel
-- set no-parallel tag is to prevent timeout of this test
-- Tags: long, no-parallel, no-msan, no-tsan, no-asan
-- set no-parallel and no sanitizers tag is to prevent timeout of this test
drop table if exists t;

View File

@ -9,7 +9,7 @@ written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing']
@ -26,7 +26,7 @@ written_rows: 4
written_bytes: 16
result_rows: 4
result_bytes: 16
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing']
@ -54,7 +54,7 @@ written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing','default.async_insert_target']
@ -71,7 +71,7 @@ written_rows: 6
written_bytes: 24
result_rows: 6
result_bytes: 24
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing','default.async_insert_target']
@ -118,7 +118,7 @@ written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing','default.async_insert_target']
@ -135,7 +135,7 @@ written_rows: 3
written_bytes: 12
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing','default.async_insert_target']

View File

@ -1,6 +0,0 @@
42
4242
424242
select 42 clickhouse.default-1.sql
select 4242 clickhouse.default-2.sql
select 424242\n foo

View File

@ -1,24 +0,0 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment, because the test has to set its own
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
file1="$CUR_DIR/clickhouse.${CLICKHOUSE_DATABASE}-1.sql"
echo -n 'select 42' >> "$file1"
file2="$CUR_DIR/clickhouse.${CLICKHOUSE_DATABASE}-2.sql"
echo -n 'select 4242' >> "$file2"
$CLICKHOUSE_CLIENT --queries-file "$file1" "$file2" <<<'select 42'
$CLICKHOUSE_CLIENT --log_comment foo --queries-file /dev/stdin <<<'select 424242'
$CLICKHOUSE_CLIENT -m -q "
system flush logs;
select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 42' and type != 'QueryStart';
select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 4242' and type != 'QueryStart';
select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 424242\n' and type != 'QueryStart';
" | sed "s#$CUR_DIR/##"
rm "$file1" "$file2"

View File

@ -4,7 +4,7 @@ Row 1:
──────
database: default
table: async_insert_landing
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
format: Values
error: DB::Exc*****on: Cannot parse string 'Invalid' as UInt32:
populated_flush_query_id: 1
@ -18,7 +18,7 @@ written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing']
@ -35,7 +35,7 @@ written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
query: INSERT INTO default.async_insert_landing (id) SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values
query_kind: AsyncInsertFlush
databases: ['default']
tables: ['default.async_insert_landing']

View File

@ -0,0 +1,2 @@
< Content-Encoding: zstd
< Content-Encoding: zstd

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"
# with progress
${CLICKHOUSE_CURL} -vsS "${URL}?send_progress_in_http_headers=1&enable_http_compression=1&wait_end_of_query=0" -o /dev/null \
-H 'Accept-Encoding: zstd' --compressed --data-binary @- <<< "select distinct sleep(.1),name from generateRandom('name String',1,1000,2) limit 100009 format TSV" 2>&1 \
| perl -lnE 'print if /Content-Encoding/';
# no progress
${CLICKHOUSE_CURL} -vsS "${URL}?send_progress_in_http_headers=0&enable_http_compression=1&wait_end_of_query=0" -o /dev/null \
-H 'Accept-Encoding: zstd' --compressed --data-binary @- <<< "select distinct sleep(.1),name from generateRandom('name String',1,1000,2) limit 100009 format TSV" 2>&1 \
| perl -lnE 'print if /Content-Encoding/';

View File

@ -0,0 +1,121 @@
a0
a1
a10
a11
a12
a2
a3
a4
a5
a6
a7
a8
a9
('a0',['Array(Nullable(Int64))','Bool','Int64','String'])
('a1',['String'])
('a10',['Array(Nullable(Int64))','Bool','Int64','String'])
('a11',['Array(Nullable(Int64))','Bool','Int64','String'])
('a12',['Array(Nullable(Int64))','Bool','Int64','String'])
('a2',['Array(Nullable(Int64))','Bool','Int64','String'])
('a3',['Array(Nullable(Int64))','Bool','Int64','String'])
('a4',['Array(Nullable(Int64))','Bool','Int64','String'])
('a5',['Array(Nullable(Int64))','Bool','Int64','String'])
('a6',['Array(Nullable(Int64))','Bool','Int64','String'])
('a7',['Array(Nullable(Int64))','Bool','Int64','String'])
('a8',['Array(Nullable(Int64))','Bool','Int64','String'])
('a9',['Array(Nullable(Int64))','Bool','Int64','String'])
Array(Nullable(Int64))
Bool
Int64
String
Array(Nullable(Int64))
Bool
Int64
String
Filter
a1
a2
('a1',['String'])
('a2',['String'])
String
If
a1
a2
('a1',['String'])
('a2',['String'])
String
Group by
Array(Nullable(Int64)) ['a1','a2']
Bool ['a1','a2']
Int64 ['a1','a2']
None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9']
String ['a1','a2']
Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']}
Bool {'a1':['String'],'a2':['Bool']}
Int64 {'a1':['String'],'a2':['Int64']}
None {'a0':['Array(Nullable(Int64))','Bool','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Bool','Int64','String'],'a11':['Array(Nullable(Int64))','Bool','Int64','String'],'a12':['Array(Nullable(Int64))','Bool','Int64','String'],'a3':['Array(Nullable(Int64))','Bool','Int64','String'],'a4':['Array(Nullable(Int64))','Bool','Int64','String'],'a5':['Array(Nullable(Int64))','Bool','Int64','String'],'a6':['Array(Nullable(Int64))','Bool','Int64','String'],'a7':['Array(Nullable(Int64))','Bool','Int64','String'],'a8':['Array(Nullable(Int64))','Bool','Int64','String'],'a9':['Array(Nullable(Int64))','Bool','Int64','String']}
String {'a1':['String'],'a2':['String']}
Array(Nullable(Int64)) ['Array(Nullable(Int64))']
Bool ['Bool']
Int64 ['Int64']
None []
String ['String']
Remote
a0
a1
a10
a11
a12
a2
a3
a4
a5
a6
a7
a8
a9
('a0',['Array(Nullable(Int64))','Bool','Int64','String'])
('a1',['String'])
('a10',['Array(Nullable(Int64))','Bool','Int64','String'])
('a11',['Array(Nullable(Int64))','Bool','Int64','String'])
('a12',['Array(Nullable(Int64))','Bool','Int64','String'])
('a2',['Array(Nullable(Int64))','Bool','Int64','String'])
('a3',['Array(Nullable(Int64))','Bool','Int64','String'])
('a4',['Array(Nullable(Int64))','Bool','Int64','String'])
('a5',['Array(Nullable(Int64))','Bool','Int64','String'])
('a6',['Array(Nullable(Int64))','Bool','Int64','String'])
('a7',['Array(Nullable(Int64))','Bool','Int64','String'])
('a8',['Array(Nullable(Int64))','Bool','Int64','String'])
('a9',['Array(Nullable(Int64))','Bool','Int64','String'])
Array(Nullable(Int64))
Bool
Int64
String
Remote filter
a1
a2
('a1',['String'])
('a2',['String'])
String
Remote if
a1
a2
('a1',['String'])
('a2',['String'])
String
Remote group by
Array(Nullable(Int64)) ['a1','a2']
Bool ['a1','a2']
Int64 ['a1','a2']
None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9']
String ['a1','a2']
Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']}
Bool {'a1':['String'],'a2':['Bool']}
Int64 {'a1':['String'],'a2':['Int64']}
None {'a0':['Array(Nullable(Int64))','Bool','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Bool','Int64','String'],'a11':['Array(Nullable(Int64))','Bool','Int64','String'],'a12':['Array(Nullable(Int64))','Bool','Int64','String'],'a3':['Array(Nullable(Int64))','Bool','Int64','String'],'a4':['Array(Nullable(Int64))','Bool','Int64','String'],'a5':['Array(Nullable(Int64))','Bool','Int64','String'],'a6':['Array(Nullable(Int64))','Bool','Int64','String'],'a7':['Array(Nullable(Int64))','Bool','Int64','String'],'a8':['Array(Nullable(Int64))','Bool','Int64','String'],'a9':['Array(Nullable(Int64))','Bool','Int64','String']}
String {'a1':['String'],'a2':['String']}
Array(Nullable(Int64)) ['Array(Nullable(Int64))']
Bool ['Bool']
Int64 ['Int64']
None []
String ['String']

View File

@ -0,0 +1,63 @@
-- Tags: long
set allow_experimental_dynamic_type = 1;
set allow_experimental_json_type = 1;
set allow_experimental_variant_type = 1;
set use_variant_as_common_type = 1;
set max_block_size = 10000;
drop table if exists test_json_dynamic_aggregate_functions;
create table test_json_dynamic_aggregate_functions (json JSON(a1 String, max_dynamic_paths=2, max_dynamic_types=2)) engine=Memory;
insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toBool(number % 2)))) from numbers(100000);
select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctDynamicTypes(json.a3)) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctDynamicTypes(json.a42)) from test_json_dynamic_aggregate_functions;
select 'Filter';
select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String';
select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String';
select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String';
select 'If';
select arrayJoin(distinctJSONPathsIf(json, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctJSONPathsAndTypesIf(json, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions;
select arrayJoin(distinctDynamicTypesIf(json.a2, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions;
select 'Group by';
select dynamicType(json.a2), distinctJSONPaths(json) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2);
select dynamicType(json.a2), distinctJSONPathsAndTypes(json) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2);
select dynamicType(json.a2), distinctDynamicTypes(json.a2) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2);
select 'Remote';
select arrayJoin(distinctJSONPaths(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select arrayJoin(distinctJSONPathsAndTypes(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select arrayJoin(distinctDynamicTypes(json.a2)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select 'Remote filter';
select arrayJoin(distinctJSONPaths(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String';
select arrayJoin(distinctJSONPathsAndTypes(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String';
select arrayJoin(distinctDynamicTypes(json.a2)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String';
select 'Remote if';
select arrayJoin(distinctJSONPathsIf(json, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select arrayJoin(distinctJSONPathsAndTypesIf(json, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select arrayJoin(distinctDynamicTypesIf(json.a2, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions);
select 'Remote group by';
select dynamicType(json.a2), distinctJSONPaths(json) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2);
select dynamicType(json.a2), distinctJSONPathsAndTypes(json) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2);
select dynamicType(json.a2), distinctDynamicTypes(json.a2) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2);
select distinctJSONPaths() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctJSONPaths(json, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctJSONPaths(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
select distinctJSONPathsAndTypes() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctJSONPathsAndTypes(json, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctJSONPathsAndTypes(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
select distinctDynamicTypes() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctDynamicTypes(json.a2, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select distinctDynamicTypes(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
drop table test_json_dynamic_aggregate_functions;

View File

@ -0,0 +1,8 @@
42 24 0
42 24 0
43 34 55
42 24
43 34
INSERT INTO default.t_async_insert_alter (id, v1) FORMAT Values Preprocessed Ok
INSERT INTO default.t_async_insert_alter (id, v1, value2) FORMAT Values Preprocessed Ok
INSERT INTO default.t_async_insert_alter (id, v1, value2) FORMAT Values Preprocessed FlushError

View File

@ -0,0 +1,47 @@
-- Tags: no-parallel
-- no-parallel because the test uses FLUSH ASYNC INSERT QUEUE
SET wait_for_async_insert = 0;
SET async_insert_busy_timeout_max_ms = 300000;
SET async_insert_busy_timeout_min_ms = 300000;
SET async_insert_use_adaptive_busy_timeout = 0;
DROP TABLE IF EXISTS t_async_insert_alter;
CREATE TABLE t_async_insert_alter (id Int64, v1 Int64) ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 1;
-- ADD COLUMN
INSERT INTO t_async_insert_alter VALUES (42, 24);
ALTER TABLE t_async_insert_alter ADD COLUMN value2 Int64;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
-- MODIFY COLUMN
INSERT INTO t_async_insert_alter VALUES (43, 34, 55);
ALTER TABLE t_async_insert_alter MODIFY COLUMN value2 String;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
-- DROP COLUMN
INSERT INTO t_async_insert_alter VALUES ('100', '200', '300');
ALTER TABLE t_async_insert_alter DROP COLUMN value2;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
SELECT query, data_kind, status FROM system.asynchronous_insert_log WHERE database = currentDatabase() AND table = 't_async_insert_alter' ORDER BY event_time_microseconds;
DROP TABLE t_async_insert_alter;

View File

@ -0,0 +1,8 @@
42 24 0
42 24 0
43 34 55
42 24
43 34
INSERT INTO default.t_async_insert_alter (id, v1) FORMAT Values Parsed Ok
INSERT INTO default.t_async_insert_alter (id, v1, value2) FORMAT Values Parsed Ok
INSERT INTO default.t_async_insert_alter (id, v1, value2) FORMAT Values Parsed FlushError

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Tags: no-parallel
# no-parallel because the test uses FLUSH ASYNC INSERT QUEUE
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "
DROP TABLE IF EXISTS t_async_insert_alter;
CREATE TABLE t_async_insert_alter (id Int64, v1 Int64) ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 1;
"
url="${CLICKHOUSE_URL}&async_insert=1&async_insert_busy_timeout_max_ms=300000&async_insert_busy_timeout_min_ms=300000&wait_for_async_insert=0&async_insert_use_adaptive_busy_timeout=0"
# ADD COLUMN
${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_async_insert_alter VALUES (42, 24)"
$CLICKHOUSE_CLIENT -q "
ALTER TABLE t_async_insert_alter ADD COLUMN value2 Int64;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
"
# MODIFY COLUMN
${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_async_insert_alter VALUES (43, 34, 55)"
$CLICKHOUSE_CLIENT -q "
ALTER TABLE t_async_insert_alter MODIFY COLUMN value2 String;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
"
## DROP COLUMN
${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_async_insert_alter VALUES ('100', '200', '300')"
$CLICKHOUSE_CLIENT -q "
ALTER TABLE t_async_insert_alter DROP COLUMN value2;
SYSTEM FLUSH ASYNC INSERT QUEUE;
SYSTEM FLUSH LOGS;
SELECT * FROM t_async_insert_alter ORDER BY id;
SELECT query, data_kind, status FROM system.asynchronous_insert_log WHERE database = currentDatabase() AND table = 't_async_insert_alter' ORDER BY event_time_microseconds;
DROP TABLE t_async_insert_alter;
"

View File

@ -0,0 +1,40 @@
CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid
CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month
CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid
CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month
CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE `default`.`uk_price_paid`\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX `county_index` `county` TYPE set(10) GRANULARITY 1,\n PROJECTION `town_date_projection`\n (\n SELECT \n `town`,\n `date`,\n `price`\n ORDER BY \n `town`,\n `date`\n ),\n PROJECTION `handy_aggs_projection`\n (\n SELECT \n avg(`price`),\n max(`price`),\n sum(`price`)\n GROUP BY `town`\n )\n)\nENGINE = MergeTree\nORDER BY (`postcode1`, `postcode2`, `date`)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW `default`.`prices_by_year_view` TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n `price`,\n `date`,\n `addr1`,\n `addr2`,\n `street`,\n `town`,\n `district`,\n `county`\nFROM `default`.`uk_price_paid`
CREATE TABLE `default`.`uk_prices_aggs_dest`\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `month`\nORDER BY `month`\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW `default`.`uk_prices_aggs_view` TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nAS WITH toStartOfMonth(`date`) AS `month`\nSELECT\n `month`,\n minSimpleState(`price`) AS `min_price`,\n maxSimpleState(`price`) AS `max_price`,\n countState(`price`) AS `volume`,\n avgState(`price`) AS `avg_price`\nFROM `default`.`uk_price_paid`\nGROUP BY `month`
CREATE DICTIONARY `default`.`uk_mortgage_rates_dict`\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY `date`\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid
CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month
CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE "default"."uk_price_paid"\n(\n "price" UInt32,\n "date" Date,\n "postcode1" LowCardinality(String),\n "postcode2" LowCardinality(String),\n "type" Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n "is_new" UInt8,\n "duration" Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "locality" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String),\n INDEX "county_index" "county" TYPE set(10) GRANULARITY 1,\n PROJECTION "town_date_projection"\n (\n SELECT \n "town",\n "date",\n "price"\n ORDER BY \n "town",\n "date"\n ),\n PROJECTION "handy_aggs_projection"\n (\n SELECT \n avg("price"),\n max("price"),\n sum("price")\n GROUP BY "town"\n )\n)\nENGINE = MergeTree\nORDER BY ("postcode1", "postcode2", "date")\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW "default"."prices_by_year_view" TO default.prices_by_year_dest\n(\n "price" UInt32,\n "date" Date,\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String)\n)\nAS SELECT\n "price",\n "date",\n "addr1",\n "addr2",\n "street",\n "town",\n "district",\n "county"\nFROM "default"."uk_price_paid"
CREATE TABLE "default"."uk_prices_aggs_dest"\n(\n "month" Date,\n "min_price" SimpleAggregateFunction("min", UInt32),\n "max_price" SimpleAggregateFunction("max", UInt32),\n "volume" AggregateFunction("count", UInt32),\n "avg_price" AggregateFunction("avg", UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY "month"\nORDER BY "month"\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW "default"."uk_prices_aggs_view" TO default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction("min", UInt32),\n "max_price" SimpleAggregateFunction("max", UInt32),\n "volume" AggregateFunction("count", UInt32),\n "avg_price" AggregateFunction("avg", UInt32)\n)\nAS WITH toStartOfMonth("date") AS "month"\nSELECT\n "month",\n minSimpleState("price") AS "min_price",\n maxSimpleState("price") AS "max_price",\n countState("price") AS "volume",\n avgState("price") AS "avg_price"\nFROM "default"."uk_price_paid"\nGROUP BY "month"
CREATE DICTIONARY "default"."uk_mortgage_rates_dict"\n(\n "date" DateTime64,\n "variable" Decimal32(2),\n "fixed" Decimal32(2),\n "bank" Decimal32(2)\n)\nPRIMARY KEY "date"\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE default.uk_price_paid\n(\n "price" UInt32,\n "date" Date,\n "postcode1" LowCardinality(String),\n "postcode2" LowCardinality(String),\n "type" Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n "is_new" UInt8,\n "duration" Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "locality" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n "price" UInt32,\n "date" Date,\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid
CREATE TABLE default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction(min, UInt32),\n "max_price" SimpleAggregateFunction(max, UInt32),\n "volume" AggregateFunction(count, UInt32),\n "avg_price" AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction(min, UInt32),\n "max_price" SimpleAggregateFunction(max, UInt32),\n "volume" AggregateFunction(count, UInt32),\n "avg_price" AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month
CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n "date" DateTime64,\n "variable" Decimal32(2),\n "fixed" Decimal32(2),\n "bank" Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE `default`.`uk_price_paid`\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX `county_index` `county` TYPE set(10) GRANULARITY 1,\n PROJECTION `town_date_projection`\n (\n SELECT \n `town`,\n `date`,\n `price`\n ORDER BY \n `town`,\n `date`\n ),\n PROJECTION `handy_aggs_projection`\n (\n SELECT \n avg(`price`),\n max(`price`),\n sum(`price`)\n GROUP BY `town`\n )\n)\nENGINE = MergeTree\nORDER BY (`postcode1`, `postcode2`, `date`)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW `default`.`prices_by_year_view` TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n `price`,\n `date`,\n `addr1`,\n `addr2`,\n `street`,\n `town`,\n `district`,\n `county`\nFROM `default`.`uk_price_paid`
CREATE TABLE `default`.`uk_prices_aggs_dest`\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `month`\nORDER BY `month`\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW `default`.`uk_prices_aggs_view` TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nAS WITH toStartOfMonth(`date`) AS `month`\nSELECT\n `month`,\n minSimpleState(`price`) AS `min_price`,\n maxSimpleState(`price`) AS `max_price`,\n countState(`price`) AS `volume`,\n avgState(`price`) AS `avg_price`\nFROM `default`.`uk_price_paid`\nGROUP BY `month`
CREATE DICTIONARY `default`.`uk_mortgage_rates_dict`\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY `date`\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())
CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid
CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192
CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month
CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED())

View File

@ -0,0 +1,328 @@
DROP DICTIONARY IF EXISTS uk_mortgage_rates_dict;
DROP TABLE IF EXISTS uk_mortgage_rates;
DROP VIEW IF EXISTS uk_prices_aggs_view;
DROP TABLE IF EXISTS uk_prices_aggs_dest;
DROP VIEW IF EXISTS prices_by_year_view;
DROP TABLE IF EXISTS prices_by_year_dest;
DROP TABLE IF EXISTS uk_price_paid;
-- Create tables, views, dictionaries
CREATE TABLE uk_price_paid
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
is_new UInt8,
duration Enum('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String),
INDEX county_index county TYPE set(10) GRANULARITY 1,
PROJECTION town_date_projection
(
SELECT
town,
date,
price
ORDER BY
town,
date
),
PROJECTION handy_aggs_projection
(
SELECT
avg(price),
max(price),
sum(price)
GROUP BY town
)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, date);
CREATE TABLE prices_by_year_dest (
price UInt32,
date Date,
addr1 String,
addr2 String,
street LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
PRIMARY KEY (town, date)
PARTITION BY toYear(date);
CREATE MATERIALIZED VIEW prices_by_year_view
TO prices_by_year_dest
AS
SELECT
price,
date,
addr1,
addr2,
street,
town,
district,
county
FROM uk_price_paid;
CREATE TABLE uk_prices_aggs_dest (
month Date,
min_price SimpleAggregateFunction(min, UInt32),
max_price SimpleAggregateFunction(max, UInt32),
volume AggregateFunction(count, UInt32),
avg_price AggregateFunction(avg, UInt32)
)
ENGINE = AggregatingMergeTree
PRIMARY KEY month;
CREATE MATERIALIZED VIEW uk_prices_aggs_view
TO uk_prices_aggs_dest
AS
WITH
toStartOfMonth(date) AS month
SELECT
month,
minSimpleState(price) AS min_price,
maxSimpleState(price) AS max_price,
countState(price) AS volume,
avgState(price) AS avg_price
FROM uk_price_paid
GROUP BY month;
CREATE TABLE uk_mortgage_rates (
date DateTime64,
variable Decimal32(2),
fixed Decimal32(2),
bank Decimal32(2)
)
ENGINE Memory();
INSERT INTO uk_mortgage_rates VALUES ('2004-02-29', 5.02, 4.9, 4);
INSERT INTO uk_mortgage_rates VALUES ('2004-03-31', 5.11, 4.91, 4);
CREATE DICTIONARY uk_mortgage_rates_dict (
date DateTime64,
variable Decimal32(2),
fixed Decimal32(2),
bank Decimal32(2)
)
PRIMARY KEY date
SOURCE(
CLICKHOUSE(TABLE 'uk_mortgage_rates')
)
LAYOUT(COMPLEX_KEY_HASHED())
LIFETIME(2628000000);
-- Show tables, views, dictionaries with default settings
SHOW CREATE TABLE uk_price_paid;
SHOW CREATE VIEW prices_by_year_view;
SHOW CREATE uk_prices_aggs_dest;
SHOW CREATE VIEW uk_prices_aggs_view;
SHOW CREATE DICTIONARY uk_mortgage_rates_dict;
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='None'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='None';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='None';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='None';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='None';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='None';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='Backticks'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='Backticks';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='Backticks'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='Backticks';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='Backticks';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='DoubleQuotes'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='DoubleQuotes';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='DoubleQuotes'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='DoubleQuotes';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='DoubleQuotes';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='BackticksMySQL'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=true,
output_format_identifier_quoting_style='BackticksMySQL';
-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='BackticksMySQL'
SHOW CREATE TABLE uk_price_paid
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE VIEW prices_by_year_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE uk_prices_aggs_dest
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE VIEW uk_prices_aggs_view
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='BackticksMySQL';
SHOW CREATE DICTIONARY uk_mortgage_rates_dict
SETTINGS
output_format_always_quote_identifiers=false,
output_format_identifier_quoting_style='BackticksMySQL';
DROP DICTIONARY uk_mortgage_rates_dict;
DROP TABLE uk_mortgage_rates;
DROP VIEW uk_prices_aggs_view;
DROP TABLE uk_prices_aggs_dest;
DROP VIEW prices_by_year_view;
DROP TABLE prices_by_year_dest;
DROP TABLE uk_price_paid;

View File

@ -10,7 +10,7 @@ SAMPLE BY intHash32(UserID)
SETTINGS storage_policy = 'default';
INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000
SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1;
SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1, max_parallel_replicas=1;
CREATE DATABASE IF NOT EXISTS db_dict;
DROP DICTIONARY IF EXISTS db_dict.cache_hits;

View File

@ -1564,6 +1564,11 @@ disjunctions
displayName
displaySecretsInShowAndSelect
distro
distinctdynamictypes
distinctDynamicTypes
distinctjsonpaths
distinctJSONPaths
distinctJSONPathsAndTypes
divideDecimal
dmesg
doesnt

View File

@ -1,5 +1,7 @@
v24.8.3.59-lts 2024-09-03
v24.8.2.3-lts 2024-08-22
v24.8.1.2684-lts 2024-08-21
v24.7.5.37-stable 2024-09-03
v24.7.4.51-stable 2024-08-23
v24.7.3.42-stable 2024-08-08
v24.7.2.13-stable 2024-08-01

1 v24.8.2.3-lts v24.8.3.59-lts 2024-08-22 2024-09-03
1 v24.8.3.59-lts 2024-09-03
2 v24.8.2.3-lts v24.8.2.3-lts 2024-08-22 2024-08-22
3 v24.8.1.2684-lts v24.8.1.2684-lts 2024-08-21 2024-08-21
4 v24.7.5.37-stable 2024-09-03
5 v24.7.4.51-stable v24.7.4.51-stable 2024-08-23 2024-08-23
6 v24.7.3.42-stable v24.7.3.42-stable 2024-08-08 2024-08-08
7 v24.7.2.13-stable v24.7.2.13-stable 2024-08-01 2024-08-01