Merge branch 'master' into rename_list_option

This commit is contained in:
alesapin 2022-03-19 20:16:57 +01:00
commit ac3c607ed4
158 changed files with 4645 additions and 1787 deletions

View File

@ -32,7 +32,7 @@ jobs:
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ${{runner.temp}}/release_packages/*
file: ${{runner.temp}}/push_to_artifactory/*
overwrite: true
tag: ${{ github.ref }}
file_glob: true

View File

@ -1,4 +1,11 @@
### ClickHouse release v22.3-lts, 2022-03-17
### Table of Contents
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**<br>
## <a id="223"></a> ClickHouse release v22.3-lts, 2022-03-17
#### Backward Incompatible Change
@ -125,7 +132,7 @@
* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)).
### ClickHouse release v22.2, 2022-02-17
### <a id="222"></a> ClickHouse release v22.2, 2022-02-17
#### Upgrade Notes
@ -301,7 +308,7 @@
* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)).
### ClickHouse release v22.1, 2022-01-18
### <a id="221"></a> ClickHouse release v22.1, 2022-01-18
#### Upgrade Notes

View File

@ -15,7 +15,7 @@ The following versions of ClickHouse server are currently being supported with s
| 20.x | :x: |
| 21.1 | :x: |
| 21.2 | :x: |
| 21.3 | |
| 21.3 | :x: |
| 21.4 | :x: |
| 21.5 | :x: |
| 21.6 | :x: |
@ -23,9 +23,11 @@ The following versions of ClickHouse server are currently being supported with s
| 21.8 | ✅ |
| 21.9 | :x: |
| 21.10 | :x: |
| 21.11 | |
| 21.12 | |
| 21.11 | :x: |
| 21.12 | :x: |
| 22.1 | ✅ |
| 22.2 | ✅ |
| 22.3 | ✅ |
## Reporting a Vulnerability

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54460)
SET(VERSION_REVISION 54461)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 3)
SET(VERSION_MINOR 4)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 75366fc95e510b7ac76759ef670702ae5f488a51)
SET(VERSION_DESCRIBE v22.3.1.1-testing)
SET(VERSION_STRING 22.3.1.1)
SET(VERSION_GITHASH 92ab33f560e638d1989c5ca543021ab53d110f5c)
SET(VERSION_DESCRIBE v22.4.1.1-testing)
SET(VERSION_STRING 22.4.1.1)
# end of autochange

View File

@ -51,6 +51,7 @@ The supported formats are:
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [ProtobufList](#protobuflist) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
@ -1230,7 +1231,38 @@ See also [how to read/write length-delimited protobuf messages in popular langua
## ProtobufSingle {#protobufsingle}
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
Same as [Protobuf](#protobuf) but for storing/parsing a single Protobuf message without length delimiter.
As a result, only a single table row can be written/read.
## ProtobufList {#protobuflist}
Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope".
Usage example:
``` sql
SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType'
```
``` bash
cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'"
```
where the file `schemafile.proto` looks like this:
``` capnp
syntax = "proto3";
message Envelope {
message MessageType {
string name = 1;
string surname = 2;
uint32 birthDate = 3;
repeated string phoneNumbers = 4;
};
MessageType row = 1;
};
```
## Avro {#data-format-avro}

View File

@ -3290,6 +3290,19 @@ Possible values:
Default value: `16`.
## max_insert_delayed_streams_for_parallel_write {#max-insert-delayed-streams-for-parallel-write}
The maximum number of streams (columns) to delay final part flush.
It makes difference only if underlying storage supports parallel write (i.e. S3), otherwise it will not give any benefit.
Possible values:
- Positive integer.
- 0 or 1 — Disabled.
Default value: `1000` for S3 and `0` otherwise.
## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).

View File

@ -225,15 +225,15 @@ This storage method works the same way as hashed and allows using date/time (arb
Example: The table contains discounts for each advertiser in the format:
``` text
+---------|-------------|-------------|------+
+---------------|---------------------|-------------------|--------+
| advertiser id | discount start date | discount end date | amount |
+===============+=====================+===================+========+
| 123 | 2015-01-01 | 2015-01-15 | 0.15 |
+---------|-------------|-------------|------+
+---------------|---------------------|-------------------|--------+
| 123 | 2015-01-16 | 2015-01-31 | 0.25 |
+---------|-------------|-------------|------+
+---------------|---------------------|-------------------|--------+
| 456 | 2015-01-01 | 2015-01-15 | 0.05 |
+---------|-------------|-------------|------+
+---------------|---------------------|-------------------|--------+
```
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
@ -272,10 +272,10 @@ LAYOUT(RANGE_HASHED())
RANGE(MIN first MAX last)
```
To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected:
To work with these dictionaries, you need to pass an additional argument to the `dictGet*` function, for which a range is selected:
``` sql
dictGetT('dict_name', 'attr_name', id, date)
dictGet*('dict_name', 'attr_name', id, date)
```
This function returns the value for the specified `id`s and the date range that includes the passed date.
@ -479,17 +479,17 @@ This type of storage is for mapping network prefixes (IP addresses) to metadata
Example: The table contains network prefixes and their corresponding AS number and country code:
``` text
+-----------|-----|------+
+-----------------|-------|--------+
| prefix | asn | cca2 |
+=================+=======+========+
| 202.79.32.0/20 | 17501 | NP |
+-----------|-----|------+
+-----------------|-------|--------+
| 2620:0:870::/48 | 3856 | US |
+-----------|-----|------+
+-----------------|-------|--------+
| 2a02:6b8:1::/48 | 13238 | RU |
+-----------|-----|------+
+-----------------|-------|--------+
| 2001:db8::/32 | 65536 | ZZ |
+-----------|-----|------+
+-----------------|-------|--------+
```
When using this type of layout, the structure must have a composite key.
@ -538,10 +538,10 @@ PRIMARY KEY prefix
The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet.
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys:
For queries, you must use the same functions (`dictGet*` with a tuple) as for dictionaries with composite keys:
``` sql
dictGetT('dict_name', 'attr_name', tuple(ip))
dictGet*('dict_name', 'attr_name', tuple(ip))
```
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6:

View File

@ -1392,12 +1392,24 @@ Returns the first element in the `arr1` array for which `func` returns something
Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayFirstOrNull(func, arr1, …) {#array-first-or-null}
Returns the first element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null.
Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayLast(func, arr1, …) {#array-last}
Returns the last element in the `arr1` array for which `func` returns something other than 0.
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayLastOrNull(func, arr1, …) {#array-last-or-null}
Returns the last element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null.
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayFirstIndex(func, arr1, …) {#array-first-index}
Returns the index of the first element in the `arr1` array for which `func` returns something other than 0.

View File

@ -1026,4 +1026,185 @@ Result:
│ 41162 │
└─────────────┘
```
## h3PointDistM {#h3pointdistm}
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in meters.
**Syntax**
``` sql
h3PointDistM(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in meters.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistM(-10.0 ,0.0, 10.0, 0.0) as h3PointDistM;
```
Result:
``` text
┌──────h3PointDistM─┐
│ 2223901.039504589 │
└───────────────────┘
```
## h3PointDistKm {#h3pointdistkm}
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in kilometers.
**Syntax**
``` sql
h3PointDistKm(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in kilometers.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistKm(-10.0 ,0.0, 10.0, 0.0) as h3PointDistKm;
```
Result:
``` text
┌─────h3PointDistKm─┐
│ 2223.901039504589 │
└───────────────────┘
```
## h3PointDistRads {#h3pointdistrads}
Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in radians.
**Syntax**
``` sql
h3PointDistRads(lat1, lon1, lat2, lon2)
```
**Arguments**
- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- Haversine or great circle distance in radians.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
select h3PointDistRads(-10.0 ,0.0, 10.0, 0.0) as h3PointDistRads;
```
Result:
``` text
┌────h3PointDistRads─┐
│ 0.3490658503988659 │
└────────────────────┘
```
## h3GetRes0Indexes {#h3getres0indexes}
Returns an array of all the resolution 0 H3 indexes.
**Syntax**
``` sql
h3GetRes0Indexes()
```
**Returned values**
- Array of all the resolution 0 H3 indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
select h3GetRes0Indexes as indexes ;
```
Result:
``` text
┌─indexes─────────────────────────────────────┐
│ [576495936675512319,576531121047601151,....]│
└─────────────────────────────────────────────┘
```
## h3GetPentagonIndexes {#h3getpentagonindexes}
Returns all the pentagon H3 indexes at the specified resolution.
**Syntax**
``` sql
h3GetPentagonIndexes(resolution)
```
**Parameter**
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Array of all pentagon H3 indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
SELECT h3GetPentagonIndexes(3) AS indexes;
```
Result:
``` text
┌─indexes────────────────────────────────────────────────────────┐
│ [590112357393367039,590464201114255359,590816044835143679,...] │
└────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) <!--hide-->

View File

@ -2,6 +2,49 @@
toc_priority: 76
toc_title: Security Changelog
---
## Fixed in ClickHouse 21.10.2.15, 2021-10-18 {#fixed-in-clickhouse-release-21-10-2-215-2021-10-18}
### CVE-2021-43304 {#cve-2021-43304}
Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy<copy_amount>(op, ip, copy_end), dont exceed the destination buffers limits.
Credits: JFrog Security Research Team
### CVE-2021-43305 {#cve-2021-43305}
Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy<copy_amount>(op, ip, copy_end), dont exceed the destination buffers limits. This issue is very similar to CVE-2021-43304, but the vulnerable copy operation is in a different wildCopy call.
Credits: JFrog Security Research Team
### CVE-2021-42387 {#cve-2021-42387}
Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the upper bounds of the source of the copy operation.
Credits: JFrog Security Research Team
### CVE-2021-42388 {#cve-2021-42388}
Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the lower bounds of the source of the copy operation.
Credits: JFrog Security Research Team
### CVE-2021-42389 {#cve-2021-42389}
Divide-by-zero in Clickhouse's Delta compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
Credits: JFrog Security Research Team
### CVE-2021-42390 {#cve-2021-42390}
Divide-by-zero in Clickhouse's DeltaDouble compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
Credits: JFrog Security Research Team
### CVE-2021-42391 {#cve-2021-42391}
Divide-by-zero in Clickhouse's Gorilla compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0.
Credits: JFrog Security Research Team
## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12}

View File

@ -1,10 +1,5 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.numbers_mt {#system-numbers-mt}
# 系统。numbers_mt {#system-numbers-mt}
一样的 [系统。数字](../../operations/system-tables/numbers.md) 但读取是并行的。 这些数字可以以任何顺序返回。
与[system.numbers](../../operations/system-tables/numbers.md)相似,但读取是并行的。 这些数字可以以任何顺序返回。
用于测试。

View File

@ -31,7 +31,7 @@
- 对于dict_name分层字典查找child_id键是否位于ancestor_id或匹配ancestor_id。返回UInt8。
## 独裁主义 {#dictgethierarchy}
## dictGetHierarchy {#dictgethierarchy}
`dictGetHierarchy('dict_name', id)`

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnObject.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnSparse.h>
#include <DataTypes/ObjectUtils.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/DataTypeNothing.h>
@ -254,7 +255,7 @@ void ColumnObject::Subcolumn::insert(Field field)
void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
{
auto base_type = info.scalar_type;
auto base_type = std::move(info.scalar_type);
if (isNothing(base_type) && info.num_dimensions == 0)
{
@ -288,7 +289,8 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
if (data.empty())
{
data.push_back(value_type->createColumn());
auto serialization = value_type->getSerialization(ISerialization::Kind::SPARSE);
data.push_back(value_type->createColumn(*serialization));
least_common_type = value_type;
}
else if (!least_common_type->equals(*value_type))
@ -297,7 +299,8 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
type_changed = true;
if (!least_common_type->equals(*value_type))
{
data.push_back(value_type->createColumn());
auto serialization = value_type->getSerialization(ISerialization::Kind::SPARSE);
data.push_back(value_type->createColumn(*serialization));
least_common_type = value_type;
}
}
@ -340,11 +343,23 @@ void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t star
}
}
bool ColumnObject::Subcolumn::isFinalized() const
{
return data.empty() ||
(data.size() == 1 && !data[0]->isSparse() && num_of_defaults_in_prefix == 0);
}
void ColumnObject::Subcolumn::finalize()
{
if (isFinalized() || data.empty())
if (isFinalized())
return;
if (data.size() == 1 && num_of_defaults_in_prefix == 0)
{
data[0] = data[0]->convertToFullColumnIfSparse();
return;
}
const auto & to_type = least_common_type;
auto result_column = to_type->createColumn();
@ -353,6 +368,7 @@ void ColumnObject::Subcolumn::finalize()
for (auto & part : data)
{
part = part->convertToFullColumnIfSparse();
auto from_type = getDataTypeByColumn(*part);
size_t part_size = part->size();

View File

@ -66,7 +66,7 @@ public:
size_t byteSize() const;
size_t allocatedBytes() const;
bool isFinalized() const { return data.size() == 1 && num_of_defaults_in_prefix == 0; }
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type; }
/// Checks the consistency of column's parts stored in @data.

View File

@ -194,7 +194,7 @@ void FileSegment::write(const char * from, size_t size)
{
std::lock_guard segment_lock(mutex);
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLog());
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock));
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
@ -405,7 +405,11 @@ void FileSegment::completeImpl(bool allow_non_strict_checking)
String FileSegment::getInfoForLog() const
{
std::lock_guard segment_lock(mutex);
return getInfoForLogImpl(segment_lock);
}
String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const
{
WriteBufferFromOwnString info;
info << "File segment: " << range().toString() << ", ";
info << "state: " << download_state << ", ";

View File

@ -130,6 +130,7 @@ private:
static String getCallerIdImpl(bool allow_non_strict_checking = false);
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
const Range segment_range;

View File

@ -127,6 +127,7 @@ PoolWithFailover::Entry PoolWithFailover::get()
/// If we cannot connect to some replica due to pool overflow, than we will wait and connect.
PoolPtr * full_pool = nullptr;
std::map<std::string, std::tuple<std::string, int>> error_detail;
for (size_t try_no = 0; try_no < max_tries; ++try_no)
{
@ -160,6 +161,15 @@ PoolWithFailover::Entry PoolWithFailover::get()
}
app.logger().warning("Connection to " + pool->getDescription() + " failed: " + e.displayText());
//save all errors to error_detail
if (error_detail.contains(pool->getDescription()))
{
error_detail[pool->getDescription()] = {e.displayText(), e.code()};
}
else
{
error_detail.insert({pool->getDescription(), {e.displayText(), e.code()}});
}
continue;
}
@ -180,7 +190,14 @@ PoolWithFailover::Entry PoolWithFailover::get()
message << "Connections to all replicas failed: ";
for (auto it = replicas_by_priority.begin(); it != replicas_by_priority.end(); ++it)
for (auto jt = it->second.begin(); jt != it->second.end(); ++jt)
{
message << (it == replicas_by_priority.begin() && jt == it->second.begin() ? "" : ", ") << (*jt)->getDescription();
if (error_detail.contains((*jt)->getDescription()))
{
std::tuple<std::string, int> error_and_code = error_detail[(*jt)->getDescription()];
message << ", ERROR " << std::get<1>(error_and_code) << " : " << std::get<0>(error_and_code);
}
}
throw Poco::Exception(message.str());
}

View File

@ -44,6 +44,7 @@ class IColumn;
M(UInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \
M(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \

View File

@ -3,7 +3,7 @@
#include <DataTypes/Serializations/PathInData.h>
#include <DataTypes/IDataType.h>
#include <Columns/IColumn.h>
#include <unordered_map>
#include <Common/HashTable/HashMap.h>
namespace DB
{
@ -31,7 +31,8 @@ public:
Kind kind = TUPLE;
const Node * parent = nullptr;
std::map<String, std::shared_ptr<Node>, std::less<>> children;
Arena strings_pool;
HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, StringRefHash, 4> children;
NodeData data;
PathInData path;
@ -39,10 +40,11 @@ public:
bool isNested() const { return kind == NESTED; }
bool isScalar() const { return kind == SCALAR; }
void addChild(const String & key, std::shared_ptr<Node> next_node)
void addChild(std::string_view key, std::shared_ptr<Node> next_node)
{
next_node->parent = this;
children[key] = std::move(next_node);
StringRef key_ref{strings_pool.insert(key.data(), key.length()), key.length()};
children[key_ref] = std::move(next_node);
}
};
@ -83,10 +85,10 @@ public:
{
assert(current_node->kind != Node::SCALAR);
auto it = current_node->children.find(parts[i].key);
auto it = current_node->children.find(StringRef{parts[i].key});
if (it != current_node->children.end())
{
current_node = it->second.get();
current_node = it->getMapped().get();
node_creator(current_node->kind, true);
if (current_node->isNested() != parts[i].is_nested)
@ -101,7 +103,7 @@ public:
}
}
auto it = current_node->children.find(parts.back().key);
auto it = current_node->children.find(StringRef{parts.back().key});
if (it != current_node->children.end())
return false;
@ -192,11 +194,11 @@ private:
for (const auto & part : parts)
{
auto it = current_node->children.find(part.key);
auto it = current_node->children.find(StringRef{part.key});
if (it == current_node->children.end())
return find_exact ? nullptr : current_node;
current_node = it->second.get();
current_node = it->getMapped().get();
}
return current_node;

View File

@ -8,6 +8,7 @@
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <boost/algorithm/string/split.hpp>
@ -97,6 +98,8 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn<void()> auto && r
res = std::make_shared<DataTypeDateTime64>(6);
else if (type == "date")
res = std::make_shared<DataTypeDate>();
else if (type == "uuid")
res = std::make_shared<DataTypeUUID>();
else if (type.starts_with("numeric"))
{
/// Numeric and decimal will both end up here as numeric. If it has type and precision,

View File

@ -1,6 +1,7 @@
#include "CassandraDictionarySource.h"
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
#include <Interpreters/Context.h>
namespace DB
{
@ -17,13 +18,17 @@ void registerDictionarySourceCassandra(DictionarySourceFactory & factory)
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
[[maybe_unused]] const std::string & config_prefix,
[[maybe_unused]] Block & sample_block,
ContextPtr /* global_context */,
[[maybe_unused]] ContextPtr global_context,
const std::string & /* default_database */,
bool /*created_from_ddl*/) -> DictionarySourcePtr
{
#if USE_CASSANDRA
setupCassandraDriverLibraryLogging(CASS_LOG_INFO);
return std::make_unique<CassandraDictionarySource>(dict_struct, config, config_prefix + ".cassandra", sample_block);
auto source_config_prefix = config_prefix + ".cassandra";
global_context->getRemoteHostFilter().checkHostAndPort(config.getString(source_config_prefix + ".host"), toString(config.getUInt(source_config_prefix + ".port", 0)));
return std::make_unique<CassandraDictionarySource>(dict_struct, config, source_config_prefix, sample_block);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Dictionary source of type `cassandra` is disabled because ClickHouse was built without cassandra support.");

View File

@ -8,6 +8,7 @@
#include <Poco/Redis/Command.h>
#include <Poco/Redis/Type.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
@ -40,15 +41,20 @@ namespace DB
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
Block & sample_block,
ContextPtr /* global_context */,
ContextPtr global_context,
const std::string & /* default_database */,
bool /* created_from_ddl */) -> DictionarySourcePtr {
auto redis_config_prefix = config_prefix + ".redis";
auto host = config.getString(redis_config_prefix + ".host");
auto port = config.getUInt(redis_config_prefix + ".port");
global_context->getRemoteHostFilter().checkHostAndPort(host, toString(port));
RedisDictionarySource::Configuration configuration =
{
.host = config.getString(redis_config_prefix + ".host"),
.port = static_cast<UInt16>(config.getUInt(redis_config_prefix + ".port")),
.host = host,
.port = static_cast<UInt16>(port),
.db_index = config.getUInt(redis_config_prefix + ".db_index", 0),
.password = config.getString(redis_config_prefix + ".password", ""),
.storage_type = parseStorageType(config.getString(redis_config_prefix + ".storage_type", "")),

View File

@ -248,6 +248,10 @@ public:
/// Overrode in remote fs disks.
virtual bool supportZeroCopyReplication() const = 0;
/// Whether this disk support parallel write
/// Overrode in remote fs disks.
virtual bool supportParallelWrite() const { return false; }
virtual bool isReadOnly() const { return false; }
/// Check if disk is broken. Broken disks will have 0 space and not be used.

View File

@ -4,7 +4,6 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromS3.h>
#include <IO/WriteHelpers.h>
#include <Common/createHardLink.h>
#include <Common/quoteString.h>

View File

@ -105,6 +105,8 @@ public:
bool supportZeroCopyReplication() const override { return true; }
bool supportParallelWrite() const override { return true; }
void shutdown() override;
void startup() override;

View File

@ -24,7 +24,9 @@ ProtobufSchemas & ProtobufSchemas::instance()
class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector
{
public:
explicit ImporterWithSourceTree(const String & schema_directory) : importer(&disk_source_tree, this)
explicit ImporterWithSourceTree(const String & schema_directory, WithEnvelope with_envelope_)
: importer(&disk_source_tree, this)
, with_envelope(with_envelope_)
{
disk_source_tree.MapPath("", schema_directory);
}
@ -39,16 +41,33 @@ public:
return descriptor;
const auto * file_descriptor = importer.Import(schema_path);
// If there are parsing errors AddError() throws an exception and in this case the following line
// If there are parsing errors, AddError() throws an exception and in this case the following line
// isn't executed.
assert(file_descriptor);
descriptor = file_descriptor->FindMessageTypeByName(message_name);
if (!descriptor)
throw Exception(
"Not found a message named '" + message_name + "' in the schema file '" + schema_path + "'", ErrorCodes::BAD_ARGUMENTS);
if (with_envelope == WithEnvelope::No)
{
const auto * message_descriptor = file_descriptor->FindMessageTypeByName(message_name);
if (!message_descriptor)
throw Exception(
"Could not find a message named '" + message_name + "' in the schema file '" + schema_path + "'", ErrorCodes::BAD_ARGUMENTS);
return descriptor;
return message_descriptor;
}
else
{
const auto * envelope_descriptor = file_descriptor->FindMessageTypeByName("Envelope");
if (!envelope_descriptor)
throw Exception(
"Could not find a message named 'Envelope' in the schema file '" + schema_path + "'", ErrorCodes::BAD_ARGUMENTS);
const auto * message_descriptor = envelope_descriptor->FindNestedTypeByName(message_name); // silly protobuf API disallows a restricting the field type to messages
if (!message_descriptor)
throw Exception(
"Could not find a message named '" + message_name + "' in the schema file '" + schema_path + "'", ErrorCodes::BAD_ARGUMENTS);
return message_descriptor;
}
}
private:
@ -63,18 +82,16 @@ private:
google::protobuf::compiler::DiskSourceTree disk_source_tree;
google::protobuf::compiler::Importer importer;
const WithEnvelope with_envelope;
};
ProtobufSchemas::ProtobufSchemas() = default;
ProtobufSchemas::~ProtobufSchemas() = default;
const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info)
const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope)
{
std::lock_guard lock(mutex);
auto it = importers.find(info.schemaDirectory());
if (it == importers.end())
it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory())).first;
it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), with_envelope)).first;
auto * importer = it->second.get();
return importer->import(info.schemaPath(), info.messageName());
}

View File

@ -28,14 +28,36 @@ class FormatSchemaInfo;
class ProtobufSchemas : private boost::noncopyable
{
public:
static ProtobufSchemas & instance();
enum class WithEnvelope
{
// Return descriptor for a top-level message with a user-provided name.
// Example: In protobuf schema
// message MessageType {
// string colA = 1;
// int32 colB = 2;
// }
// message_name = "MessageType" returns a descriptor. Used by IO
// formats Protobuf and ProtobufSingle.
No,
// Return descriptor for a message with a user-provided name one level
// below a top-level message with the hardcoded name "Envelope".
// Example: In protobuf schema
// message Envelope {
// message MessageType {
// string colA = 1;
// int32 colB = 2;
// }
// }
// message_name = "MessageType" returns a descriptor. Used by IO format
// ProtobufList.
Yes
};
ProtobufSchemas();
~ProtobufSchemas();
static ProtobufSchemas & instance();
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info);
const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope);
private:
class ImporterWithSourceTree;

View File

@ -2171,6 +2171,11 @@ namespace
field_index_by_field_tag.emplace(field_infos[i].field_tag, i);
}
void setHasEnvelopeAsParent()
{
has_envelope_as_parent = true;
}
void setColumns(const ColumnPtr * columns_, size_t num_columns_) override
{
if (!num_columns_)
@ -2217,7 +2222,7 @@ namespace
void writeRow(size_t row_num) override
{
if (parent_field_descriptor)
if (parent_field_descriptor || has_envelope_as_parent)
writer->startNestedMessage();
else
writer->startMessage();
@ -2236,13 +2241,17 @@ namespace
bool is_group = (parent_field_descriptor->type() == FieldTypeId::TYPE_GROUP);
writer->endNestedMessage(parent_field_descriptor->number(), is_group, should_skip_if_empty);
}
else if (has_envelope_as_parent)
{
writer->endNestedMessage(1, false, should_skip_if_empty);
}
else
writer->endMessage(with_length_delimiter);
}
void readRow(size_t row_num) override
{
if (parent_field_descriptor)
if (parent_field_descriptor || has_envelope_as_parent)
reader->startNestedMessage();
else
reader->startMessage(with_length_delimiter);
@ -2285,7 +2294,7 @@ namespace
}
}
if (parent_field_descriptor)
if (parent_field_descriptor || has_envelope_as_parent)
reader->endNestedMessage();
else
reader->endMessage(false);
@ -2375,6 +2384,7 @@ namespace
};
const FieldDescriptor * const parent_field_descriptor;
bool has_envelope_as_parent = false;
const bool with_length_delimiter;
const std::unique_ptr<RowInputMissingColumnsFiller> missing_columns_filler;
const bool should_skip_if_empty;
@ -2388,6 +2398,86 @@ namespace
size_t last_field_index = static_cast<size_t>(-1);
};
/// Serializes a top-level envelope message in the protobuf schema.
/// "Envelope" means that the contained subtree of serializers is enclosed in a message just once,
/// i.e. only when the first and the last row read/write trigger a read/write of the msg header.
class ProtobufSerializerEnvelope : public ProtobufSerializer
{
public:
ProtobufSerializerEnvelope(
std::unique_ptr<ProtobufSerializerMessage>&& serializer_,
const ProtobufReaderOrWriter & reader_or_writer_)
: serializer(std::move(serializer_))
, reader(reader_or_writer_.reader)
, writer(reader_or_writer_.writer)
{
// The inner serializer has a backreference of type protobuf::FieldDescriptor * to it's parent
// serializer. If it is unset, it considers itself the top-level message, otherwise a nested
// message and accordingly it makes start/endMessage() vs. startEndNestedMessage() calls into
// Protobuf(Writer|Reader). There is no field descriptor because Envelopes merely forward calls
// but don't contain data to be serialized. We must still force the inner serializer to act
// as nested message.
serializer->setHasEnvelopeAsParent();
}
void setColumns(const ColumnPtr * columns_, size_t num_columns_) override
{
serializer->setColumns(columns_, num_columns_);
}
void setColumns(const MutableColumnPtr * columns_, size_t num_columns_) override
{
serializer->setColumns(columns_, num_columns_);
}
void writeRow(size_t row_num) override
{
if (first_call_of_write_row)
{
writer->startMessage();
first_call_of_write_row = false;
}
serializer->writeRow(row_num);
}
void finalizeWrite() override
{
writer->endMessage(/*with_length_delimiter = */ true);
}
void readRow(size_t row_num) override
{
if (first_call_of_read_row)
{
reader->startMessage(/*with_length_delimiter = */ true);
first_call_of_read_row = false;
}
int field_tag;
[[maybe_unused]] bool ret = reader->readFieldNumber(field_tag);
assert(ret);
serializer->readRow(row_num);
}
void insertDefaults(size_t row_num) override
{
serializer->insertDefaults(row_num);
}
void describeTree(WriteBuffer & out, size_t indent) const override
{
writeIndent(out, indent) << "ProtobufSerializerEnvelope ->\n";
serializer->describeTree(out, indent + 1);
}
std::unique_ptr<ProtobufSerializerMessage> serializer;
ProtobufReader * const reader;
ProtobufWriter * const writer;
bool first_call_of_write_row = true;
bool first_call_of_read_row = true;
};
/// Serializes a tuple with explicit names as a nested message.
class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer
@ -2610,7 +2700,8 @@ namespace
const DataTypes & data_types,
std::vector<size_t> & missing_column_indices,
const MessageDescriptor & message_descriptor,
bool with_length_delimiter)
bool with_length_delimiter,
bool with_envelope)
{
root_serializer_ptr = std::make_shared<ProtobufSerializer *>();
get_root_desc_function = [root_serializer_ptr = root_serializer_ptr](size_t indent) -> String
@ -2648,13 +2739,23 @@ namespace
boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted,
std::back_inserter(missing_column_indices));
*root_serializer_ptr = message_serializer.get();
if (!with_envelope)
{
*root_serializer_ptr = message_serializer.get();
#if 0
LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0));
LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0));
#endif
return message_serializer;
return message_serializer;
}
else
{
auto envelope_serializer = std::make_unique<ProtobufSerializerEnvelope>(std::move(message_serializer), reader_or_writer);
*root_serializer_ptr = envelope_serializer.get();
#if 0
LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0));
#endif
return envelope_serializer;
}
}
private:
@ -3337,9 +3438,10 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
std::vector<size_t> & missing_column_indices,
const google::protobuf::Descriptor & message_descriptor,
bool with_length_delimiter,
bool with_envelope,
ProtobufReader & reader)
{
return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope);
}
std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
@ -3347,10 +3449,11 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
const DataTypes & data_types,
const google::protobuf::Descriptor & message_descriptor,
bool with_length_delimiter,
bool with_envelope,
ProtobufWriter & writer)
{
std::vector<size_t> missing_column_indices;
return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope);
}
NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor)

View File

@ -26,6 +26,7 @@ public:
virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
virtual void writeRow(size_t row_num) = 0;
virtual void finalizeWrite() {}
virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
virtual void readRow(size_t row_num) = 0;
@ -39,6 +40,7 @@ public:
std::vector<size_t> & missing_column_indices,
const google::protobuf::Descriptor & message_descriptor,
bool with_length_delimiter,
bool with_envelope,
ProtobufReader & reader);
static std::unique_ptr<ProtobufSerializer> create(
@ -46,6 +48,7 @@ public:
const DataTypes & data_types,
const google::protobuf::Descriptor & message_descriptor,
bool with_length_delimiter,
bool with_envelope,
ProtobufWriter & writer);
};

View File

@ -36,6 +36,8 @@ void registerInputFormatJSONCompactEachRow(FormatFactory & factory);
void registerOutputFormatJSONCompactEachRow(FormatFactory & factory);
void registerInputFormatProtobuf(FormatFactory & factory);
void registerOutputFormatProtobuf(FormatFactory & factory);
void registerInputFormatProtobufList(FormatFactory & factory);
void registerOutputFormatProtobufList(FormatFactory & factory);
void registerInputFormatTemplate(FormatFactory & factory);
void registerOutputFormatTemplate(FormatFactory & factory);
void registerInputFormatMsgPack(FormatFactory & factory);
@ -98,6 +100,7 @@ void registerNativeSchemaReader(FormatFactory & factory);
void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory);
void registerAvroSchemaReader(FormatFactory & factory);
void registerProtobufSchemaReader(FormatFactory & factory);
void registerProtobufListSchemaReader(FormatFactory & factory);
void registerLineAsStringSchemaReader(FormatFactory & factory);
void registerJSONAsStringSchemaReader(FormatFactory & factory);
void registerRawBLOBSchemaReader(FormatFactory & factory);
@ -140,6 +143,8 @@ void registerFormats()
registerInputFormatJSONCompactEachRow(factory);
registerOutputFormatJSONCompactEachRow(factory);
registerInputFormatProtobuf(factory);
registerOutputFormatProtobufList(factory);
registerInputFormatProtobufList(factory);
registerOutputFormatProtobuf(factory);
registerInputFormatTemplate(factory);
registerOutputFormatTemplate(factory);
@ -199,6 +204,7 @@ void registerFormats()
registerRowBinaryWithNamesAndTypesSchemaReader(factory);
registerAvroSchemaReader(factory);
registerProtobufSchemaReader(factory);
registerProtobufListSchemaReader(factory);
registerLineAsStringSchemaReader(factory);
registerJSONAsStringSchemaReader(factory);
registerRawBLOBSchemaReader(factory);

View File

@ -1,4 +1,5 @@
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
@ -12,13 +13,19 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
enum class ArrayFirstLastStrategy
enum class ArrayFirstLastStrategy : uint8_t
{
First,
Last
};
template <ArrayFirstLastStrategy strategy>
enum class ArrayFirstLastElementNotExistsStrategy : uint8_t
{
Default,
Null
};
template <ArrayFirstLastStrategy strategy, ArrayFirstLastElementNotExistsStrategy element_not_exists_strategy>
struct ArrayFirstLastImpl
{
using column_type = ColumnArray;
@ -30,6 +37,9 @@ struct ArrayFirstLastImpl
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
{
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
return makeNullable(array_element);
return array_element;
}
@ -52,6 +62,16 @@ struct ArrayFirstLastImpl
out->reserve(data.size());
size_t offsets_size = offsets.size();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
{
col_null_map_to = ColumnUInt8::create(offsets_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
for (size_t offset_index = 0; offset_index < offsets_size; ++offset_index)
{
size_t start_offset = offsets[offset_index - 1];
@ -67,16 +87,29 @@ struct ArrayFirstLastImpl
else
{
out->insertDefault();
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
(*vec_null_map_to)[offset_index] = true;
}
}
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
return ColumnNullable::create(std::move(out), std::move(col_null_map_to));
return out;
}
else
{
auto out = array.getData().cloneEmpty();
out->insertDefault();
return out->replicate(IColumn::Offsets(1, array.size()));
out->insertManyDefaults(array.size());
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
{
auto col_null_map_to = ColumnUInt8::create(out->size(), true);
return ColumnNullable::create(std::move(out), std::move(col_null_map_to));
}
return out;
}
}
@ -87,6 +120,16 @@ struct ArrayFirstLastImpl
out->reserve(data.size());
size_t offsets_size = offsets.size();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
{
col_null_map_to = ColumnUInt8::create(offsets_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
for (size_t offset_index = 0; offset_index < offsets_size; ++offset_index)
{
size_t start_offset = offsets[offset_index - 1];
@ -120,25 +163,43 @@ struct ArrayFirstLastImpl
}
if (!exists)
{
out->insertDefault();
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
(*vec_null_map_to)[offset_index] = true;
}
}
if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null)
return ColumnNullable::create(std::move(out), std::move(col_null_map_to));
return out;
}
};
struct NameArrayFirst { static constexpr auto name = "arrayFirst"; };
using ArrayFirstImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::First>;
using ArrayFirstImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::First, ArrayFirstLastElementNotExistsStrategy::Default>;
using FunctionArrayFirst = FunctionArrayMapped<ArrayFirstImpl, NameArrayFirst>;
struct NameArrayFirstOrNull { static constexpr auto name = "arrayFirstOrNull"; };
using ArrayFirstOrNullImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::First, ArrayFirstLastElementNotExistsStrategy::Null>;
using FunctionArrayFirstOrNull = FunctionArrayMapped<ArrayFirstOrNullImpl, NameArrayFirstOrNull>;
struct NameArrayLast { static constexpr auto name = "arrayLast"; };
using ArrayLastImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::Last>;
using ArrayLastImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::Last, ArrayFirstLastElementNotExistsStrategy::Default>;
using FunctionArrayLast = FunctionArrayMapped<ArrayLastImpl, NameArrayLast>;
struct NameArrayLastOrNull { static constexpr auto name = "arrayLastOrNull"; };
using ArrayLastOrNullImpl = ArrayFirstLastImpl<ArrayFirstLastStrategy::Last, ArrayFirstLastElementNotExistsStrategy::Null>;
using FunctionArrayLastOrNull = FunctionArrayMapped<ArrayLastOrNullImpl, NameArrayLastOrNull>;
void registerFunctionArrayFirst(FunctionFactory & factory)
{
factory.registerFunction<FunctionArrayFirst>();
factory.registerFunction<FunctionArrayFirstOrNull>();
factory.registerFunction<FunctionArrayLast>();
factory.registerFunction<FunctionArrayLastOrNull>();
}
}

View File

@ -0,0 +1,118 @@
#include "config_functions.h"
#if USE_H3
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Common/typeid_cast.h>
#include <constants.h>
#include <h3api.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
{
class FunctionH3GetPentagonIndexes : public IFunction
{
public:
static constexpr auto name = "h3GetPentagonIndexes";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionH3GetPentagonIndexes>(); }
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto * arg = arguments[0].get();
if (!WhichDataType(arg).isUInt8())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument {} of function {}. Must be UInt8",
arg->getName(), 1, getName());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto non_const_arguments = arguments;
for (auto & argument : non_const_arguments)
argument.column = argument.column->convertToFullColumnIfConst();
const auto * column = checkAndGetColumn<ColumnUInt8>(non_const_arguments[0].column.get());
if (!column)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt8.",
arguments[0].type->getName(),
1,
getName());
const auto & data = column->getData();
auto result_column_data = ColumnUInt64::create();
auto & result_data = result_column_data->getData();
auto result_column_offsets = ColumnArray::ColumnOffsets::create();
auto & result_offsets = result_column_offsets->getData();
result_offsets.resize(input_rows_count);
auto current_offset = 0;
std::vector<H3Index> hindex_vec;
result_data.reserve(input_rows_count);
for (size_t row = 0; row < input_rows_count; ++row)
{
if (data[row] > MAX_H3_RES)
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ",
toString(data[row]),
getName(),
MAX_H3_RES);
const auto vec_size = pentagonCount();
hindex_vec.resize(vec_size);
getPentagons(data[row], hindex_vec.data());
for (auto & i : hindex_vec)
{
++current_offset;
result_data.emplace_back(i);
}
result_offsets[row] = current_offset;
hindex_vec.clear();
}
return ColumnArray::create(std::move(result_column_data), std::move(result_column_offsets));
}
};
}
void registerFunctionH3GetPentagonIndexes(FunctionFactory & factory)
{
factory.registerFunction<FunctionH3GetPentagonIndexes>();
}
}
#endif

View File

@ -0,0 +1,72 @@
#include "config_functions.h"
#if USE_H3
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
#include <base/range.h>
#include <h3api.h>
namespace DB
{
namespace
{
class FunctionH3GetRes0Indexes final : public IFunction
{
public:
static constexpr auto name = "h3GetRes0Indexes";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionH3GetRes0Indexes>(); }
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override
{
if (input_rows_count == 0)
return result_type->createColumn();
std::vector<H3Index> res0_indexes;
const auto cell_count = res0CellCount();
res0_indexes.resize(cell_count);
getRes0Cells(res0_indexes.data());
auto res = ColumnArray::create(ColumnUInt64::create());
Array res_indexes;
res_indexes.insert(res_indexes.end(), res0_indexes.begin(), res0_indexes.end());
res->insert(res_indexes);
return result_type->createColumnConst(input_rows_count, res_indexes);
}
};
}
void registerFunctionH3GetRes0Indexes(FunctionFactory & factory)
{
factory.registerFunction<FunctionH3GetRes0Indexes>();
}
}
#endif

View File

@ -0,0 +1,153 @@
#include "config_functions.h"
#if USE_H3
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
#include <base/range.h>
#include <constants.h>
#include <h3api.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
}
namespace
{
template <class Impl>
class FunctionH3PointDist final : public IFunction
{
public:
static constexpr auto name = Impl::name;
static constexpr auto function = Impl::function;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionH3PointDist>(); }
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 4; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
for (size_t i = 0; i < getNumberOfArguments(); ++i)
{
const auto * arg = arguments[i].get();
if (!WhichDataType(arg).isFloat64())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument {} of function {}. Must be Float64",
arg->getName(), i, getName());
}
return std::make_shared<DataTypeFloat64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto non_const_arguments = arguments;
for (auto & argument : non_const_arguments)
argument.column = argument.column->convertToFullColumnIfConst();
const auto * col_lat1 = checkAndGetColumn<ColumnFloat64>(non_const_arguments[0].column.get());
if (!col_lat1)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[0].type->getName(),
1,
getName());
const auto & data_lat1 = col_lat1->getData();
const auto * col_lon1 = checkAndGetColumn<ColumnFloat64>(non_const_arguments[1].column.get());
if (!col_lon1)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[1].type->getName(),
2,
getName());
const auto & data_lon1 = col_lon1->getData();
const auto * col_lat2 = checkAndGetColumn<ColumnFloat64>(non_const_arguments[2].column.get());
if (!col_lat2)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[2].type->getName(),
3,
getName());
const auto & data_lat2 = col_lat2->getData();
const auto * col_lon2 = checkAndGetColumn<ColumnFloat64>(non_const_arguments[3].column.get());
if (!col_lon2)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64",
arguments[3].type->getName(),
4,
getName());
const auto & data_lon2 = col_lon2->getData();
auto dst = ColumnVector<Float64>::create();
auto & dst_data = dst->getData();
dst_data.resize(input_rows_count);
for (size_t row = 0; row < input_rows_count; ++row)
{
const double lat1 = data_lat1[row];
const double lon1 = data_lon1[row];
const auto lat2 = data_lat2[row];
const auto lon2 = data_lon2[row];
LatLng point1 = {degsToRads(lat1), degsToRads(lon1)};
LatLng point2 = {degsToRads(lat2), degsToRads(lon2)};
// function will be equivalent to distanceM or distanceKm or distanceRads
Float64 res = function(&point1, &point2);
dst_data[row] = res;
}
return dst;
}
};
}
struct H3PointDistM
{
static constexpr auto name = "h3PointDistM";
static constexpr auto function = distanceM;
};
struct H3PointDistKm
{
static constexpr auto name = "h3PointDistKm";
static constexpr auto function = distanceKm;
};
struct H3PointDistRads
{
static constexpr auto name = "h3PointDistRads";
static constexpr auto function = distanceRads;
};
void registerFunctionH3PointDistM(FunctionFactory & factory) { factory.registerFunction<FunctionH3PointDist<H3PointDistM>>(); }
void registerFunctionH3PointDistKm(FunctionFactory & factory) { factory.registerFunction<FunctionH3PointDist<H3PointDistKm>>(); }
void registerFunctionH3PointDistRads(FunctionFactory & factory) { factory.registerFunction<FunctionH3PointDist<H3PointDistRads>>(); }
}
#endif

View File

@ -0,0 +1,291 @@
#include <cfloat>
#include <cmath>
#include <boost/math/distributions/normal.hpp>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <Interpreters/castColumn.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename Impl>
class FunctionMinSampleSize : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMinSampleSize<Impl>>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return Impl::num_args; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override
{
return ColumnNumbers(std::begin(Impl::const_args), std::end(Impl::const_args));
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
static DataTypePtr getReturnType()
{
auto float_64_type = std::make_shared<DataTypeNumber<Float64>>();
DataTypes types{
float_64_type,
float_64_type,
float_64_type,
};
Strings names{
"minimum_sample_size",
"detect_range_lower",
"detect_range_upper",
};
return std::make_shared<DataTypeTuple>(std::move(types), std::move(names));
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
Impl::validateArguments(arguments);
return getReturnType();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
return Impl::execute(arguments, input_rows_count);
}
};
static bool isBetweenZeroAndOne(Float64 v)
{
return v >= 0.0 && v <= 1.0 && fabs(v - 0.0) >= DBL_EPSILON && fabs(v - 1.0) >= DBL_EPSILON;
}
struct ContinousImpl
{
static constexpr auto name = "minSampleSizeContinous";
static constexpr size_t num_args = 5;
static constexpr size_t const_args[] = {2, 3, 4};
static void validateArguments(const DataTypes & arguments)
{
for (size_t i = 0; i < arguments.size(); ++i)
{
if (!isNativeNumber(arguments[i]))
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The {}th Argument of function {} must be a number.", i + 1, name);
}
}
}
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, size_t input_rows_count)
{
auto float_64_type = std::make_shared<DataTypeFloat64>();
auto baseline_argument = arguments[0];
baseline_argument.column = baseline_argument.column->convertToFullColumnIfConst();
auto baseline_column_untyped = castColumnAccurate(baseline_argument, float_64_type);
const auto * baseline_column = checkAndGetColumn<ColumnVector<Float64>>(*baseline_column_untyped);
const auto & baseline_column_data = baseline_column->getData();
auto sigma_argument = arguments[1];
sigma_argument.column = sigma_argument.column->convertToFullColumnIfConst();
auto sigma_column_untyped = castColumnAccurate(sigma_argument, float_64_type);
const auto * sigma_column = checkAndGetColumn<ColumnVector<Float64>>(*sigma_column_untyped);
const auto & sigma_column_data = sigma_column->getData();
const IColumn & col_mde = *arguments[2].column;
const IColumn & col_power = *arguments[3].column;
const IColumn & col_alpha = *arguments[4].column;
auto res_min_sample_size = ColumnFloat64::create();
auto & data_min_sample_size = res_min_sample_size->getData();
data_min_sample_size.reserve(input_rows_count);
auto res_detect_lower = ColumnFloat64::create();
auto & data_detect_lower = res_detect_lower->getData();
data_detect_lower.reserve(input_rows_count);
auto res_detect_upper = ColumnFloat64::create();
auto & data_detect_upper = res_detect_upper->getData();
data_detect_upper.reserve(input_rows_count);
/// Minimal Detectable Effect
const Float64 mde = col_mde.getFloat64(0);
/// Sufficient statistical power to detect a treatment effect
const Float64 power = col_power.getFloat64(0);
/// Significance level
const Float64 alpha = col_alpha.getFloat64(0);
boost::math::normal_distribution<> nd(0.0, 1.0);
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
/// Mean of control-metric
Float64 baseline = baseline_column_data[row_num];
/// Standard deviation of conrol-metric
Float64 sigma = sigma_column_data[row_num];
if (!std::isfinite(baseline) || !std::isfinite(sigma) || !isBetweenZeroAndOne(mde) || !isBetweenZeroAndOne(power)
|| !isBetweenZeroAndOne(alpha))
{
data_min_sample_size.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
data_detect_lower.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
data_detect_upper.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
continue;
}
Float64 delta = baseline * mde;
using namespace boost::math;
/// https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a
/// \frac{2\sigma^{2} * (Z_{1 - alpha /2} + Z_{power})^{2}}{\Delta^{2}}
Float64 min_sample_size
= 2 * std::pow(sigma, 2) * std::pow(quantile(nd, 1.0 - alpha / 2) + quantile(nd, power), 2) / std::pow(delta, 2);
data_min_sample_size.emplace_back(min_sample_size);
data_detect_lower.emplace_back(baseline - delta);
data_detect_upper.emplace_back(baseline + delta);
}
return ColumnTuple::create(Columns{std::move(res_min_sample_size), std::move(res_detect_lower), std::move(res_detect_upper)});
}
};
struct ConversionImpl
{
static constexpr auto name = "minSampleSizeConversion";
static constexpr size_t num_args = 4;
static constexpr size_t const_args[] = {1, 2, 3};
static void validateArguments(const DataTypes & arguments)
{
size_t arguments_size = arguments.size();
for (size_t i = 0; i < arguments_size; ++i)
{
if (!isFloat(arguments[i]))
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The {}th argument of function {} must be a float.", i + 1, name);
}
}
}
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, size_t input_rows_count)
{
auto first_argument_column = castColumnAccurate(arguments[0], std::make_shared<DataTypeFloat64>());
if (const ColumnConst * const col_p1_const = checkAndGetColumnConst<ColumnVector<Float64>>(first_argument_column.get()))
{
const Float64 left_value = col_p1_const->template getValue<Float64>();
return process<true>(arguments, &left_value, input_rows_count);
}
else if (const ColumnVector<Float64> * const col_p1 = checkAndGetColumn<ColumnVector<Float64>>(first_argument_column.get()))
{
return process<false>(arguments, col_p1->getData().data(), input_rows_count);
}
else
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The first argument of function {} must be a float.", name);
}
}
template <bool const_p1>
static ColumnPtr process(const ColumnsWithTypeAndName & arguments, const Float64 * col_p1, const size_t input_rows_count)
{
const IColumn & col_mde = *arguments[1].column;
const IColumn & col_power = *arguments[2].column;
const IColumn & col_alpha = *arguments[3].column;
auto res_min_sample_size = ColumnFloat64::create();
auto & data_min_sample_size = res_min_sample_size->getData();
data_min_sample_size.reserve(input_rows_count);
auto res_detect_lower = ColumnFloat64::create();
auto & data_detect_lower = res_detect_lower->getData();
data_detect_lower.reserve(input_rows_count);
auto res_detect_upper = ColumnFloat64::create();
auto & data_detect_upper = res_detect_upper->getData();
data_detect_upper.reserve(input_rows_count);
/// Minimal Detectable Effect
const Float64 mde = col_mde.getFloat64(0);
/// Sufficient statistical power to detect a treatment effect
const Float64 power = col_power.getFloat64(0);
/// Significance level
const Float64 alpha = col_alpha.getFloat64(0);
boost::math::normal_distribution<> nd(0.0, 1.0);
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
/// Proportion of control-metric
Float64 p1;
if constexpr (const_p1)
{
p1 = col_p1[0];
}
else if constexpr (!const_p1)
{
p1 = col_p1[row_num];
}
if (!std::isfinite(p1) || !isBetweenZeroAndOne(mde) || !isBetweenZeroAndOne(power) || !isBetweenZeroAndOne(alpha))
{
data_min_sample_size.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
data_detect_lower.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
data_detect_upper.emplace_back(std::numeric_limits<Float64>::quiet_NaN());
continue;
}
Float64 q1 = 1.0 - p1;
Float64 p2 = p1 + mde;
Float64 q2 = 1.0 - p2;
Float64 p_bar = (p1 + p2) / 2.0;
Float64 q_bar = 1.0 - p_bar;
using namespace boost::math;
/// https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a
/// \frac{(Z_{1-alpha/2} * \sqrt{2*\bar{p}*\bar{q}} + Z_{power} * \sqrt{p1*q1+p2*q2})^{2}}{\Delta^{2}}
Float64 min_sample_size
= std::pow(
quantile(nd, 1.0 - alpha / 2.0) * std::sqrt(2.0 * p_bar * q_bar) + quantile(nd, power) * std::sqrt(p1 * q1 + p2 * q2),
2)
/ std::pow(mde, 2);
data_min_sample_size.emplace_back(min_sample_size);
data_detect_lower.emplace_back(p1 - mde);
data_detect_upper.emplace_back(p1 + mde);
}
return ColumnTuple::create(Columns{std::move(res_min_sample_size), std::move(res_detect_lower), std::move(res_detect_upper)});
}
};
void registerFunctionMinSampleSize(FunctionFactory & factory)
{
factory.registerFunction<FunctionMinSampleSize<ContinousImpl>>();
factory.registerFunction<FunctionMinSampleSize<ConversionImpl>>();
}
}

View File

@ -56,6 +56,7 @@ void registerFunctionTid(FunctionFactory & factory);
void registerFunctionLogTrace(FunctionFactory & factory);
void registerFunctionsTimeWindow(FunctionFactory &);
void registerFunctionToBool(FunctionFactory &);
void registerFunctionMinSampleSize(FunctionFactory &);
#if USE_SSL
void registerFunctionEncrypt(FunctionFactory & factory);
@ -118,6 +119,7 @@ void registerFunctions()
registerFunctionsSnowflake(factory);
registerFunctionsTimeWindow(factory);
registerFunctionToBool(factory);
registerFunctionMinSampleSize(factory);
#if USE_SSL
registerFunctionEncrypt(factory);

View File

@ -52,6 +52,11 @@ void registerFunctionH3HexAreaKm2(FunctionFactory &);
void registerFunctionH3CellAreaM2(FunctionFactory &);
void registerFunctionH3CellAreaRads2(FunctionFactory &);
void registerFunctionH3NumHexagons(FunctionFactory &);
void registerFunctionH3PointDistM(FunctionFactory &);
void registerFunctionH3PointDistKm(FunctionFactory &);
void registerFunctionH3PointDistRads(FunctionFactory &);
void registerFunctionH3GetRes0Indexes(FunctionFactory &);
void registerFunctionH3GetPentagonIndexes(FunctionFactory &);
#endif
@ -118,6 +123,11 @@ void registerFunctionsGeo(FunctionFactory & factory)
registerFunctionH3CellAreaM2(factory);
registerFunctionH3CellAreaRads2(factory);
registerFunctionH3NumHexagons(factory);
registerFunctionH3PointDistM(factory);
registerFunctionH3PointDistKm(factory);
registerFunctionH3PointDistRads(factory);
registerFunctionH3GetRes0Indexes(factory);
registerFunctionH3GetPentagonIndexes(factory);
#endif
#if USE_S2_GEOMETRY

View File

@ -1,231 +1,225 @@
#include <Common/typeid_cast.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/castTypeToEither.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <Interpreters/castColumn.h>
#include <boost/math/distributions/normal.hpp>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
class FunctionTwoSampleProportionsZTest : public IFunction
{
public:
static constexpr auto POOLED = "pooled";
static constexpr auto UNPOOLED = "unpooled";
static constexpr auto name = "proportionsZTest";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTwoSampleProportionsZTest>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 6; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {5}; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
static DataTypePtr getReturnType()
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
auto float_data_type = std::make_shared<DataTypeNumber<Float64>>();
DataTypes types(4, float_data_type);
Strings names{"z_statistic", "p_value", "confidence_interval_low", "confidence_interval_high"};
return std::make_shared<DataTypeTuple>(std::move(types), std::move(names));
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
for (size_t i = 0; i < 4; ++i)
{
if (!isUnsignedInteger(arguments[i].type))
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The {}th Argument of function {} must be an unsigned integer.",
i + 1,
getName());
}
}
if (!isFloat(arguments[4].type))
{
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The fifth argument {} of function {} should be a float,",
arguments[4].type->getName(),
getName()};
}
/// There is an additional check for constancy in ExecuteImpl
if (!isString(arguments[5].type) || !arguments[5].column)
{
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The sixth argument {} of function {} should be a constant string",
arguments[5].type->getName(),
getName()};
}
return getReturnType();
}
class FunctionTwoSampleProportionsZTest : public IFunction
ColumnPtr executeImpl(const ColumnsWithTypeAndName & const_arguments, const DataTypePtr &, size_t input_rows_count) const override
{
public:
static constexpr auto POOLED = "pooled";
static constexpr auto UNPOOLED = "unpooled";
auto arguments = const_arguments;
/// Only last argument have to be constant
for (size_t i = 0; i < 5; ++i)
arguments[i].column = arguments[i].column->convertToFullColumnIfConst();
static constexpr auto name = "proportionsZTest";
static const auto uint64_data_type = std::make_shared<DataTypeNumber<UInt64>>();
static FunctionPtr create(ContextPtr)
auto column_successes_x = castColumnAccurate(arguments[0], uint64_data_type);
const auto & data_successes_x = checkAndGetColumn<ColumnVector<UInt64>>(column_successes_x.get())->getData();
auto column_successes_y = castColumnAccurate(arguments[1], uint64_data_type);
const auto & data_successes_y = checkAndGetColumn<ColumnVector<UInt64>>(column_successes_y.get())->getData();
auto column_trials_x = castColumnAccurate(arguments[2], uint64_data_type);
const auto & data_trials_x = checkAndGetColumn<ColumnVector<UInt64>>(column_trials_x.get())->getData();
auto column_trials_y = castColumnAccurate(arguments[3], uint64_data_type);
const auto & data_trials_y = checkAndGetColumn<ColumnVector<UInt64>>(column_trials_y.get())->getData();
static const auto float64_data_type = std::make_shared<DataTypeNumber<Float64>>();
auto column_confidence_level = castColumnAccurate(arguments[4], float64_data_type);
const auto & data_confidence_level = checkAndGetColumn<ColumnVector<Float64>>(column_confidence_level.get())->getData();
String usevar = checkAndGetColumnConst<ColumnString>(arguments[5].column.get())->getValue<String>();
if (usevar != UNPOOLED && usevar != POOLED)
throw Exception{ErrorCodes::BAD_ARGUMENTS,
"The sixth argument {} of function {} must be equal to `pooled` or `unpooled`",
arguments[5].type->getName(),
getName()};
const bool is_unpooled = (usevar == UNPOOLED);
auto res_z_statistic = ColumnFloat64::create();
auto & data_z_statistic = res_z_statistic->getData();
data_z_statistic.reserve(input_rows_count);
auto res_p_value = ColumnFloat64::create();
auto & data_p_value = res_p_value->getData();
data_p_value.reserve(input_rows_count);
auto res_ci_lower = ColumnFloat64::create();
auto & data_ci_lower = res_ci_lower->getData();
data_ci_lower.reserve(input_rows_count);
auto res_ci_upper = ColumnFloat64::create();
auto & data_ci_upper = res_ci_upper->getData();
data_ci_upper.reserve(input_rows_count);
auto insert_values_into_result = [&data_z_statistic, &data_p_value, &data_ci_lower, &data_ci_upper](
Float64 z_stat, Float64 p_value, Float64 lower, Float64 upper)
{
return std::make_shared<FunctionTwoSampleProportionsZTest>();
}
data_z_statistic.emplace_back(z_stat);
data_p_value.emplace_back(p_value);
data_ci_lower.emplace_back(lower);
data_ci_upper.emplace_back(upper);
};
String getName() const override
static constexpr Float64 nan = std::numeric_limits<Float64>::quiet_NaN();
boost::math::normal_distribution<> nd(0.0, 1.0);
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
return name;
}
const UInt64 successes_x = data_successes_x[row_num];
const UInt64 successes_y = data_successes_y[row_num];
const UInt64 trials_x = data_trials_x[row_num];
const UInt64 trials_y = data_trials_y[row_num];
const Float64 confidence_level = data_confidence_level[row_num];
size_t getNumberOfArguments() const override { return 6; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {5}; }
const Float64 props_x = static_cast<Float64>(successes_x) / trials_x;
const Float64 props_y = static_cast<Float64>(successes_y) / trials_y;
const Float64 diff = props_x - props_y;
const UInt64 trials_total = trials_x + trials_y;
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
static DataTypePtr getReturnType()
{
auto float_data_type = std::make_shared<DataTypeNumber<Float64>>();
DataTypes types(4, float_data_type);
Strings names
if (successes_x == 0 || successes_y == 0 || successes_x > trials_x || successes_y > trials_y || trials_total == 0
|| !std::isfinite(confidence_level) || confidence_level < 0.0 || confidence_level > 1.0)
{
"z_statistic",
"p_value",
"confidence_interval_low",
"confidence_interval_high"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
for (size_t i = 0; i < 4; ++i)
{
if (!isUnsignedInteger(arguments[i].type))
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The {}th Argument of function {} must be an unsigned integer.", i + 1, getName());
}
insert_values_into_result(nan, nan, nan, nan);
continue;
}
if (!isFloat(arguments[4].type))
Float64 se = std::sqrt(props_x * (1.0 - props_x) / trials_x + props_y * (1.0 - props_y) / trials_y);
/// z-statistics
/// z = \frac{ \bar{p_{1}} - \bar{p_{2}} }{ \sqrt{ \frac{ \bar{p_{1}} \left ( 1 - \bar{p_{1}} \right ) }{ n_{1} } \frac{ \bar{p_{2}} \left ( 1 - \bar{p_{2}} \right ) }{ n_{2} } } }
Float64 zstat;
if (is_unpooled)
{
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The fifth argument {} of function {} should be a float,", arguments[4].type->getName(), getName()};
zstat = (props_x - props_y) / se;
}
else
{
UInt64 successes_total = successes_x + successes_y;
Float64 p_pooled = static_cast<Float64>(successes_total) / trials_total;
Float64 trials_fact = 1.0 / trials_x + 1.0 / trials_y;
zstat = diff / std::sqrt(p_pooled * (1.0 - p_pooled) * trials_fact);
}
/// There is an additional check for constancy in ExecuteImpl
if (!isString(arguments[5].type) || !arguments[5].column)
if (!std::isfinite(zstat))
{
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The sixth argument {} of function {} should be a constant string", arguments[5].type->getName(), getName()};
insert_values_into_result(nan, nan, nan, nan);
continue;
}
return getReturnType();
// pvalue
Float64 pvalue = 0;
Float64 one_side = 1 - boost::math::cdf(nd, std::abs(zstat));
pvalue = one_side * 2;
// Confidence intervals
Float64 d = props_x - props_y;
Float64 z = -boost::math::quantile(nd, (1.0 - confidence_level) / 2.0);
Float64 dist = z * se;
Float64 ci_low = d - dist;
Float64 ci_high = d + dist;
insert_values_into_result(zstat, pvalue, ci_low, ci_high);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & const_arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto arguments = const_arguments;
/// Only last argument have to be constant
for (size_t i = 0; i < 5; ++i)
arguments[i].column = arguments[i].column->convertToFullColumnIfConst();
static const auto uint64_data_type = std::make_shared<DataTypeNumber<UInt64>>();
auto column_successes_x = castColumnAccurate(arguments[0], uint64_data_type);
const auto & data_successes_x = checkAndGetColumn<ColumnVector<UInt64>>(column_successes_x.get())->getData();
auto column_successes_y = castColumnAccurate(arguments[1], uint64_data_type);
const auto & data_successes_y = checkAndGetColumn<ColumnVector<UInt64>>(column_successes_y.get())->getData();
auto column_trials_x = castColumnAccurate(arguments[2], uint64_data_type);
const auto & data_trials_x = checkAndGetColumn<ColumnVector<UInt64>>(column_trials_x.get())->getData();
auto column_trials_y = castColumnAccurate(arguments[3], uint64_data_type);
const auto & data_trials_y = checkAndGetColumn<ColumnVector<UInt64>>(column_trials_y.get())->getData();
static const auto float64_data_type = std::make_shared<DataTypeNumber<Float64>>();
auto column_confidence_level = castColumnAccurate(arguments[4], float64_data_type);
const auto & data_confidence_level = checkAndGetColumn<ColumnVector<Float64>>(column_confidence_level.get())->getData();
String usevar = checkAndGetColumnConst<ColumnString>(arguments[5].column.get())->getValue<String>();
if (usevar != UNPOOLED && usevar != POOLED)
throw Exception{ErrorCodes::BAD_ARGUMENTS,
"The sixth argument {} of function {} must be equal to `pooled` or `unpooled`", arguments[5].type->getName(), getName()};
const bool is_unpooled = (usevar == UNPOOLED);
auto res_z_statistic = ColumnFloat64::create();
auto & data_z_statistic = res_z_statistic->getData();
data_z_statistic.reserve(input_rows_count);
auto res_p_value = ColumnFloat64::create();
auto & data_p_value = res_p_value->getData();
data_p_value.reserve(input_rows_count);
auto res_ci_lower = ColumnFloat64::create();
auto & data_ci_lower = res_ci_lower->getData();
data_ci_lower.reserve(input_rows_count);
auto res_ci_upper = ColumnFloat64::create();
auto & data_ci_upper = res_ci_upper->getData();
data_ci_upper.reserve(input_rows_count);
auto insert_values_into_result = [&data_z_statistic, &data_p_value, &data_ci_lower, &data_ci_upper](Float64 z_stat, Float64 p_value, Float64 lower, Float64 upper)
{
data_z_statistic.emplace_back(z_stat);
data_p_value.emplace_back(p_value);
data_ci_lower.emplace_back(lower);
data_ci_upper.emplace_back(upper);
};
static constexpr Float64 nan = std::numeric_limits<Float64>::quiet_NaN();
boost::math::normal_distribution<> nd(0.0, 1.0);
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
const UInt64 successes_x = data_successes_x[row_num];
const UInt64 successes_y = data_successes_y[row_num];
const UInt64 trials_x = data_trials_x[row_num];
const UInt64 trials_y = data_trials_y[row_num];
const Float64 confidence_level = data_confidence_level[row_num];
const Float64 props_x = static_cast<Float64>(successes_x) / trials_x;
const Float64 props_y = static_cast<Float64>(successes_y) / trials_y;
const Float64 diff = props_x - props_y;
const UInt64 trials_total = trials_x + trials_y;
if (successes_x == 0 || successes_y == 0
|| successes_x > trials_x || successes_y > trials_y
|| trials_total == 0
|| !std::isfinite(confidence_level) || confidence_level < 0.0 || confidence_level > 1.0)
{
insert_values_into_result(nan, nan, nan, nan);
continue;
}
Float64 se = std::sqrt(props_x * (1.0 - props_x) / trials_x + props_y * (1.0 - props_y) / trials_y);
/// z-statistics
/// z = \frac{ \bar{p_{1}} - \bar{p_{2}} }{ \sqrt{ \frac{ \bar{p_{1}} \left ( 1 - \bar{p_{1}} \right ) }{ n_{1} } \frac{ \bar{p_{2}} \left ( 1 - \bar{p_{2}} \right ) }{ n_{2} } } }
Float64 zstat;
if (is_unpooled)
{
zstat = (props_x - props_y) / se;
}
else
{
UInt64 successes_total = successes_x + successes_y;
Float64 p_pooled = static_cast<Float64>(successes_total) / trials_total;
Float64 trials_fact = 1.0 / trials_x + 1.0 / trials_y;
zstat = diff / std::sqrt(p_pooled * (1.0 - p_pooled) * trials_fact);
}
if (!std::isfinite(zstat))
{
insert_values_into_result(nan, nan, nan, nan);
continue;
}
// pvalue
Float64 pvalue = 0;
Float64 one_side = 1 - boost::math::cdf(nd, std::abs(zstat));
pvalue = one_side * 2;
// Confidence intervals
Float64 d = props_x - props_y;
Float64 z = -boost::math::quantile(nd, (1.0 - confidence_level) / 2.0);
Float64 dist = z * se;
Float64 ci_low = d - dist;
Float64 ci_high = d + dist;
insert_values_into_result(zstat, pvalue, ci_low, ci_high);
}
return ColumnTuple::create(Columns{std::move(res_z_statistic), std::move(res_p_value), std::move(res_ci_lower), std::move(res_ci_upper)});
}
};
void registerFunctionZTest(FunctionFactory & factory)
{
factory.registerFunction<FunctionTwoSampleProportionsZTest>();
return ColumnTuple::create(
Columns{std::move(res_z_statistic), std::move(res_p_value), std::move(res_ci_lower), std::move(res_ci_upper)});
}
};
void registerFunctionZTest(FunctionFactory & factory)
{
factory.registerFunction<FunctionTwoSampleProportionsZTest>();
}
}

View File

@ -113,6 +113,23 @@ namespace JoinStuff
}
}
template <bool use_flags, bool multiple_disjuncts>
void JoinUsedFlags::setUsed(const Block * block, size_t row_num, size_t offset)
{
if constexpr (!use_flags)
return;
/// Could be set simultaneously from different threads.
if constexpr (multiple_disjuncts)
{
flags[block][row_num].store(true, std::memory_order_relaxed);
}
else
{
flags[nullptr][offset].store(true, std::memory_order_relaxed);
}
}
template <bool use_flags, bool multiple_disjuncts, typename FindResult>
bool JoinUsedFlags::getUsed(const FindResult & f)
{
@ -302,7 +319,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
throw Exception("ASOF join needs at least one equi-join column", ErrorCodes::SYNTAX_ERROR);
size_t asof_size;
asof_type = AsofRowRefs::getTypeSize(*key_columns.back(), asof_size);
asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size);
key_columns.pop_back();
/// this is going to set up the appropriate hash table for the direct lookup part of the join
@ -611,8 +628,8 @@ namespace
TypeIndex asof_type = *join.getAsofType();
if (emplace_result.isInserted())
time_series_map = new (time_series_map) typename Map::mapped_type(asof_type);
time_series_map->insert(asof_type, asof_column, stored_block, i);
time_series_map = new (time_series_map) typename Map::mapped_type(createAsofRowRef(asof_type, join.getAsofInequality()));
(*time_series_map)->insert(asof_column, stored_block, i);
}
};
@ -895,8 +912,6 @@ public:
bool is_join_get_)
: join_on_keys(join_on_keys_)
, rows_to_add(block.rows())
, asof_type(join.getAsofType())
, asof_inequality(join.getAsofInequality())
, is_join_get(is_join_get_)
{
size_t num_columns_to_add = block_with_columns_to_add.columns();
@ -978,8 +993,6 @@ public:
}
}
TypeIndex asofType() const { return *asof_type; }
ASOF::Inequality asofInequality() const { return asof_inequality; }
const IColumn & leftAsofKey() const { return *left_asof_key; }
std::vector<JoinOnKeyColumns> join_on_keys;
@ -994,8 +1007,6 @@ private:
std::vector<size_t> right_indexes;
size_t lazy_defaults_count = 0;
/// for ASOF
std::optional<TypeIndex> asof_type;
ASOF::Inequality asof_inequality;
const IColumn * left_asof_key = nullptr;
bool is_join_get;
@ -1224,19 +1235,18 @@ NO_INLINE IColumn::Filter joinRightColumns(
auto & mapped = find_result.getMapped();
if constexpr (jf.is_asof_join)
{
TypeIndex asof_type = added_columns.asofType();
ASOF::Inequality asof_inequality = added_columns.asofInequality();
const IColumn & left_asof_key = added_columns.leftAsofKey();
if (const RowRef * found = mapped.findAsof(asof_type, asof_inequality, left_asof_key, i))
auto [block, row_num] = mapped->findAsof(left_asof_key, i);
if (block)
{
setUsed<need_filter>(filter, i);
if constexpr (multiple_disjuncts)
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(FindResultImpl<const RowRef, false>(found, true, 0));
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(block, row_num, 0);
else
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(find_result);
added_columns.appendFromBlock<jf.add_missing>(*found->block, found->row_num);
added_columns.appendFromBlock<jf.add_missing>(*block, row_num);
}
else
addNotFoundRow<jf.add_missing, jf.need_replication>(added_columns, current_offset);

View File

@ -62,6 +62,9 @@ public:
template <bool use_flags, bool multiple_disjuncts, typename T>
void setUsed(const T & f);
template <bool use_flags, bool multiple_disjunct>
void setUsed(const Block * block, size_t row_num, size_t offset);
template <bool use_flags, bool multiple_disjuncts, typename T>
bool getUsed(const T & f);

View File

@ -1,12 +1,9 @@
#include <Interpreters/RowRefs.h>
#include <Core/Block.h>
#include <base/types.h>
#include <Common/typeid_cast.h>
#include <Common/ColumnsHashing.h>
#include <AggregateFunctions/Helpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/IDataType.h>
#include <base/types.h>
namespace DB
@ -15,6 +12,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_TYPE_OF_FIELD;
extern const int LOGICAL_ERROR;
}
namespace
@ -22,145 +20,207 @@ namespace
/// maps enum values to types
template <typename F>
void callWithType(TypeIndex which, F && f)
void callWithType(TypeIndex type, F && f)
{
switch (which)
{
case TypeIndex::UInt8: return f(UInt8());
case TypeIndex::UInt16: return f(UInt16());
case TypeIndex::UInt32: return f(UInt32());
case TypeIndex::UInt64: return f(UInt64());
case TypeIndex::Int8: return f(Int8());
case TypeIndex::Int16: return f(Int16());
case TypeIndex::Int32: return f(Int32());
case TypeIndex::Int64: return f(Int64());
case TypeIndex::Float32: return f(Float32());
case TypeIndex::Float64: return f(Float64());
case TypeIndex::Decimal32: return f(Decimal32());
case TypeIndex::Decimal64: return f(Decimal64());
case TypeIndex::Decimal128: return f(Decimal128());
case TypeIndex::DateTime64: return f(DateTime64());
default:
break;
}
WhichDataType which(type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return f(TYPE());
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
DISPATCH(Decimal256)
DISPATCH(DateTime64)
#undef DISPATCH
__builtin_unreachable();
}
}
AsofRowRefs::AsofRowRefs(TypeIndex type)
template <typename TKey, ASOF::Inequality inequality>
class SortedLookupVector : public SortedLookupVectorBase
{
auto call = [&](const auto & t)
struct Entry
{
using T = std::decay_t<decltype(t)>;
using LookupType = typename Entry<T>::LookupType;
lookups = std::make_unique<LookupType>();
/// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage.
/// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater
decltype(RowRef::block) block;
decltype(RowRef::row_num) row_num;
TKey asof_value;
Entry() = delete;
Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { }
bool operator<(const Entry & other) const { return asof_value < other.asof_value; }
};
callWithType(type, call);
}
void AsofRowRefs::insert(TypeIndex type, const IColumn & asof_column, const Block * block, size_t row_num)
{
auto call = [&](const auto & t)
struct GreaterEntryOperator
{
using T = std::decay_t<decltype(t)>;
using LookupPtr = typename Entry<T>::LookupPtr;
using ColumnType = ColumnVectorOrDecimal<T>;
const auto & column = typeid_cast<const ColumnType &>(asof_column);
T key = column.getElement(row_num);
auto entry = Entry<T>(key, RowRef(block, row_num));
std::get<LookupPtr>(lookups)->insert(entry);
bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; }
};
callWithType(type, call);
}
const RowRef * AsofRowRefs::findAsof(TypeIndex type, ASOF::Inequality inequality, const IColumn & asof_column, size_t row_num) const
{
const RowRef * out = nullptr;
public:
using Base = std::vector<Entry>;
using Keys = std::vector<TKey>;
static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
bool ascending = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::LessOrEquals);
bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
auto call = [&](const auto & t)
void insert(const IColumn & asof_column, const Block * block, size_t row_num) override
{
using T = std::decay_t<decltype(t)>;
using EntryType = Entry<T>;
using LookupPtr = typename EntryType::LookupPtr;
using ColumnType = ColumnVectorOrDecimal<TKey>;
const auto & column = assert_cast<const ColumnType &>(asof_column);
TKey k = column.getElement(row_num);
using ColumnType = ColumnVectorOrDecimal<T>;
const auto & column = typeid_cast<const ColumnType &>(asof_column);
T key = column.getElement(row_num);
auto & typed_lookup = std::get<LookupPtr>(lookups);
if (is_strict)
out = typed_lookup->upperBound(EntryType(key), ascending);
else
out = typed_lookup->lowerBound(EntryType(key), ascending);
};
callWithType(type, call);
return out;
}
std::optional<TypeIndex> AsofRowRefs::getTypeSize(const IColumn & asof_column, size_t & size)
{
TypeIndex idx = asof_column.getDataType();
switch (idx)
{
case TypeIndex::UInt8:
size = sizeof(UInt8);
return idx;
case TypeIndex::UInt16:
size = sizeof(UInt16);
return idx;
case TypeIndex::UInt32:
size = sizeof(UInt32);
return idx;
case TypeIndex::UInt64:
size = sizeof(UInt64);
return idx;
case TypeIndex::Int8:
size = sizeof(Int8);
return idx;
case TypeIndex::Int16:
size = sizeof(Int16);
return idx;
case TypeIndex::Int32:
size = sizeof(Int32);
return idx;
case TypeIndex::Int64:
size = sizeof(Int64);
return idx;
//case TypeIndex::Int128:
case TypeIndex::Float32:
size = sizeof(Float32);
return idx;
case TypeIndex::Float64:
size = sizeof(Float64);
return idx;
case TypeIndex::Decimal32:
size = sizeof(Decimal32);
return idx;
case TypeIndex::Decimal64:
size = sizeof(Decimal64);
return idx;
case TypeIndex::Decimal128:
size = sizeof(Decimal128);
return idx;
case TypeIndex::DateTime64:
size = sizeof(DateTime64);
return idx;
default:
break;
assert(!sorted.load(std::memory_order_acquire));
array.emplace_back(k, block, row_num);
}
/// Unrolled version of upper_bound and lower_bound
/// Loosely based on https://academy.realm.io/posts/how-we-beat-cpp-stl-binary-search/
/// In the future it'd interesting to replace it with a B+Tree Layout as described
/// at https://en.algorithmica.org/hpc/data-structures/s-tree/
size_t boundSearch(TKey value)
{
size_t size = array.size();
size_t low = 0;
/// This is a single binary search iteration as a macro to unroll. Takes into account the inequality:
/// isStrict -> Equal values are not requested
/// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals)
#define BOUND_ITERATION \
{ \
size_t half = size / 2; \
size_t other_half = size - half; \
size_t probe = low + half; \
size_t other_low = low + other_half; \
TKey v = array[probe].asof_value; \
size = half; \
if constexpr (isDescending) \
{ \
if constexpr (isStrict) \
low = value <= v ? other_low : low; \
else \
low = value < v ? other_low : low; \
} \
else \
{ \
if constexpr (isStrict) \
low = value >= v ? other_low : low; \
else \
low = value > v ? other_low : low; \
} \
}
while (size >= 8)
{
BOUND_ITERATION
BOUND_ITERATION
BOUND_ITERATION
}
while (size > 0)
{
BOUND_ITERATION
}
#undef BOUND_ITERATION
return low;
}
std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn & asof_column, size_t row_num) override
{
sort();
using ColumnType = ColumnVectorOrDecimal<TKey>;
const auto & column = assert_cast<const ColumnType &>(asof_column);
TKey k = column.getElement(row_num);
size_t pos = boundSearch(k);
if (pos != array.size())
return std::make_tuple(array[pos].block, array[pos].row_num);
return {nullptr, 0};
}
private:
std::atomic<bool> sorted = false;
mutable std::mutex lock;
Base array;
// Double checked locking with SC atomics works in C++
// https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
// The first thread that calls one of the lookup methods sorts the data
// After calling the first lookup method it is no longer allowed to insert any data
// the array becomes immutable
void sort()
{
if (!sorted.load(std::memory_order_acquire))
{
std::lock_guard<std::mutex> l(lock);
if (!sorted.load(std::memory_order_relaxed))
{
if constexpr (isDescending)
::sort(array.begin(), array.end(), GreaterEntryOperator());
else
::sort(array.begin(), array.end());
sorted.store(true, std::memory_order_release);
}
}
}
};
}
AsofRowRefs createAsofRowRef(TypeIndex type, ASOF::Inequality inequality)
{
AsofRowRefs result;
auto call = [&](const auto & t)
{
using T = std::decay_t<decltype(t)>;
switch (inequality)
{
case ASOF::Inequality::LessOrEquals:
result = std::make_unique<SortedLookupVector<T, ASOF::Inequality::LessOrEquals>>();
break;
case ASOF::Inequality::Less:
result = std::make_unique<SortedLookupVector<T, ASOF::Inequality::Less>>();
break;
case ASOF::Inequality::GreaterOrEquals:
result = std::make_unique<SortedLookupVector<T, ASOF::Inequality::GreaterOrEquals>>();
break;
case ASOF::Inequality::Greater:
result = std::make_unique<SortedLookupVector<T, ASOF::Inequality::Greater>>();
break;
default:
throw Exception("Invalid ASOF Join order", ErrorCodes::LOGICAL_ERROR);
}
};
callWithType(type, call);
return result;
}
std::optional<TypeIndex> SortedLookupVectorBase::getTypeSize(const IColumn & asof_column, size_t & size)
{
WhichDataType which(asof_column.getDataType());
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
{ \
size = sizeof(TYPE); \
return asof_column.getDataType(); \
}
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
DISPATCH(Decimal256)
DISPATCH(DateTime64)
#undef DISPATCH
throw Exception("ASOF join not supported for type: " + std::string(asof_column.getFamilyName()), ErrorCodes::BAD_TYPE_OF_FIELD);
}

View File

@ -1,16 +1,18 @@
#pragma once
#include <optional>
#include <variant>
#include <algorithm>
#include <cassert>
#include <list>
#include <mutex>
#include <algorithm>
#include <optional>
#include <variant>
#include <base/sort.h>
#include <Common/Arena.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
#include <Interpreters/asof.h>
#include <base/sort.h>
#include <Common/Arena.h>
namespace DB
@ -26,7 +28,7 @@ struct RowRef
const Block * block = nullptr;
SizeT row_num = 0;
RowRef() {} /// NOLINT
RowRef() = default;
RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {}
};
@ -141,123 +143,23 @@ private:
* After calling any of the lookup methods, it is no longer allowed to insert more data as this would invalidate the
* references that can be returned by the lookup methods
*/
template <typename TEntry, typename TKey>
class SortedLookupVector
struct SortedLookupVectorBase
{
public:
using Base = std::vector<TEntry>;
// First stage, insertions into the vector
template <typename U, typename ... TAllocatorParams>
void insert(U && x, TAllocatorParams &&... allocator_params)
{
assert(!sorted.load(std::memory_order_acquire));
array.push_back(std::forward<U>(x), std::forward<TAllocatorParams>(allocator_params)...);
}
const RowRef * upperBound(const TEntry & k, bool ascending)
{
sort(ascending);
auto it = std::upper_bound(array.cbegin(), array.cend(), k, (ascending ? less : greater));
if (it != array.cend())
return &(it->row_ref);
return nullptr;
}
const RowRef * lowerBound(const TEntry & k, bool ascending)
{
sort(ascending);
auto it = std::lower_bound(array.cbegin(), array.cend(), k, (ascending ? less : greater));
if (it != array.cend())
return &(it->row_ref);
return nullptr;
}
private:
std::atomic<bool> sorted = false;
Base array;
mutable std::mutex lock;
static bool less(const TEntry & a, const TEntry & b)
{
return a.asof_value < b.asof_value;
}
static bool greater(const TEntry & a, const TEntry & b)
{
return a.asof_value > b.asof_value;
}
// Double checked locking with SC atomics works in C++
// https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
// The first thread that calls one of the lookup methods sorts the data
// After calling the first lookup method it is no longer allowed to insert any data
// the array becomes immutable
void sort(bool ascending)
{
if (!sorted.load(std::memory_order_acquire))
{
std::lock_guard<std::mutex> l(lock);
if (!sorted.load(std::memory_order_relaxed))
{
if (!array.empty())
::sort(array.begin(), array.end(), (ascending ? less : greater));
sorted.store(true, std::memory_order_release);
}
}
}
};
class AsofRowRefs
{
public:
template <typename T>
struct Entry
{
using LookupType = SortedLookupVector<Entry<T>, T>;
using LookupPtr = std::unique_ptr<LookupType>;
T asof_value;
RowRef row_ref;
explicit Entry(T v) : asof_value(v) {}
Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {}
};
using Lookups = std::variant<
Entry<UInt8>::LookupPtr,
Entry<UInt16>::LookupPtr,
Entry<UInt32>::LookupPtr,
Entry<UInt64>::LookupPtr,
Entry<Int8>::LookupPtr,
Entry<Int16>::LookupPtr,
Entry<Int32>::LookupPtr,
Entry<Int64>::LookupPtr,
Entry<Float32>::LookupPtr,
Entry<Float64>::LookupPtr,
Entry<Decimal32>::LookupPtr,
Entry<Decimal64>::LookupPtr,
Entry<Decimal128>::LookupPtr,
Entry<DateTime64>::LookupPtr>;
AsofRowRefs() = default;
explicit AsofRowRefs(TypeIndex t);
SortedLookupVectorBase() = default;
virtual ~SortedLookupVectorBase() { }
static std::optional<TypeIndex> getTypeSize(const IColumn & asof_column, size_t & type_size);
// This will be synchronized by the rwlock mutex in Join.h
void insert(TypeIndex type, const IColumn & asof_column, const Block * block, size_t row_num);
virtual void insert(const IColumn &, const Block *, size_t) = 0;
// This will internally synchronize
const RowRef * findAsof(TypeIndex type, ASOF::Inequality inequality, const IColumn & asof_column, size_t row_num) const;
private:
// Lookups can be stored in a HashTable because it is memmovable
// A std::variant contains a currently active type id (memmovable), together with a union of the types
// The types are all std::unique_ptr, which contains a single pointer, which is memmovable.
// Source: https://github.com/ClickHouse/ClickHouse/issues/4906
Lookups lookups;
// This needs to be synchronized internally
virtual std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn &, size_t) = 0;
};
// It only contains a std::unique_ptr which is memmovable.
// Source: https://github.com/ClickHouse/ClickHouse/issues/4906
using AsofRowRefs = std::unique_ptr<SortedLookupVectorBase>;
AsofRowRefs createAsofRowRef(TypeIndex type, ASOF::Inequality inequality);
}

View File

@ -0,0 +1,97 @@
#include "ProtobufListInputFormat.h"
#if USE_PROTOBUF
# include <Core/Block.h>
# include <Formats/FormatFactory.h>
# include <Formats/ProtobufReader.h>
# include <Formats/ProtobufSchemas.h>
# include <Formats/ProtobufSerializer.h>
namespace DB
{
ProtobufListInputFormat::ProtobufListInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_)
: IRowInputFormat(header_, in_, params_)
, reader(std::make_unique<ProtobufReader>(in_))
, serializer(ProtobufSerializer::create(
header_.getNames(),
header_.getDataTypes(),
missing_column_indices,
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes),
/* with_length_delimiter = */ true,
/* with_envelope = */ true,
*reader))
{
}
bool ProtobufListInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
{
if (reader->eof())
{
reader->endMessage(/*ignore_errors =*/ false);
return false;
}
size_t row_num = columns.empty() ? 0 : columns[0]->size();
if (!row_num)
serializer->setColumns(columns.data(), columns.size());
serializer->readRow(row_num);
row_read_extension.read_columns.clear();
row_read_extension.read_columns.resize(columns.size(), true);
for (size_t column_idx : missing_column_indices)
row_read_extension.read_columns[column_idx] = false;
return true;
}
ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings)
: schema_info(
format_settings.schema.format_schema,
"Protobuf",
true,
format_settings.schema.is_server,
format_settings.schema.format_schema_path)
{
}
NamesAndTypesList ProtobufListSchemaReader::readSchema()
{
const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes);
return protobufSchemaToCHSchema(message_descriptor);
}
void registerInputFormatProtobufList(FormatFactory & factory)
{
factory.registerInputFormat(
"ProtobufList",
[](ReadBuffer &buf,
const Block & sample,
RowInputFormatParams params,
const FormatSettings & settings)
{
return std::make_shared<ProtobufListInputFormat>(buf, sample, std::move(params), FormatSchemaInfo(settings, "Protobuf", true));
});
factory.markFormatAsColumnOriented("ProtobufList");
}
void registerProtobufListSchemaReader(FormatFactory & factory)
{
factory.registerExternalSchemaReader("ProtobufList", [](const FormatSettings & settings)
{
return std::make_shared<ProtobufListSchemaReader>(settings);
});
}
}
#else
namespace DB
{
class FormatFactory;
void registerInputFormatProtobufList(FormatFactory &) {}
void registerProtobufListSchemaReader(FormatFactory &) {}
}
#endif

View File

@ -0,0 +1,52 @@
#pragma once
#include "config_formats.h"
#if USE_PROTOBUF
# include <Formats/FormatSchemaInfo.h>
# include <Processors/Formats/IRowInputFormat.h>
# include <Processors/Formats/ISchemaReader.h>
namespace DB
{
class Block;
class ProtobufReader;
class ProtobufSerializer;
class ReadBuffer;
/** Stream designed to deserialize data from the google protobuf format.
* One nested Protobuf message is parsed as one row of data.
*
* Parsing of the protobuf format requires the 'format_schema' setting to be set, e.g.
* INSERT INTO table FORMAT Protobuf SETTINGS format_schema = 'schema:Message'
* where schema is the name of "schema.proto" file specifying protobuf schema.
*/
class ProtobufListInputFormat final : public IRowInputFormat
{
public:
ProtobufListInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_);
String getName() const override { return "ProtobufListInputFormat"; }
private:
bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override;
std::unique_ptr<ProtobufReader> reader;
std::vector<size_t> missing_column_indices;
std::unique_ptr<ProtobufSerializer> serializer;
};
class ProtobufListSchemaReader : public IExternalSchemaReader
{
public:
explicit ProtobufListSchemaReader(const FormatSettings & format_settings);
NamesAndTypesList readSchema() override;
private:
const FormatSchemaInfo schema_info;
};
}
#endif

View File

@ -0,0 +1,68 @@
#include "ProtobufListOutputFormat.h"
#if USE_PROTOBUF
# include <Formats/FormatFactory.h>
# include <Formats/FormatSchemaInfo.h>
# include <Formats/ProtobufWriter.h>
# include <Formats/ProtobufSerializer.h>
# include <Formats/ProtobufSchemas.h>
namespace DB
{
ProtobufListOutputFormat::ProtobufListOutputFormat(
WriteBuffer & out_,
const Block & header_,
const RowOutputFormatParams & params_,
const FormatSchemaInfo & schema_info_)
: IRowOutputFormat(header_, out_, params_)
, writer(std::make_unique<ProtobufWriter>(out))
, serializer(ProtobufSerializer::create(
header_.getNames(),
header_.getDataTypes(),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes),
/* with_length_delimiter = */ true,
/* with_envelope = */ true,
*writer))
{
}
void ProtobufListOutputFormat::write(const Columns & columns, size_t row_num)
{
if (row_num == 0)
serializer->setColumns(columns.data(), columns.size());
serializer->writeRow(row_num);
}
void ProtobufListOutputFormat::finalizeImpl()
{
serializer->finalizeWrite();
}
void registerOutputFormatProtobufList(FormatFactory & factory)
{
factory.registerOutputFormat(
"ProtobufList",
[](WriteBuffer & buf,
const Block & header,
const RowOutputFormatParams & params,
const FormatSettings & settings)
{
return std::make_shared<ProtobufListOutputFormat>(
buf, header, params,
FormatSchemaInfo(settings, "Protobuf", true));
});
}
}
#else
namespace DB
{
class FormatFactory;
void registerOutputFormatProtobufList(FormatFactory &) {}
}
#endif

View File

@ -0,0 +1,48 @@
#pragma once
#include "config_formats.h"
#if USE_PROTOBUF
# include <Processors/Formats/IRowOutputFormat.h>
namespace DB
{
class FormatSchemaInfo;
class ProtobufWriter;
class ProtobufSerializer;
/** Stream designed to serialize data in the google protobuf format.
* Each row is written as a separated nested message, and all rows are enclosed by a single
* top-level, envelope message
*
* Serializing in the protobuf format requires the 'format_schema' setting to be set, e.g.
* SELECT * from table FORMAT Protobuf SETTINGS format_schema = 'schema:Message'
* where schema is the name of "schema.proto" file specifying protobuf schema.
*/
// class ProtobufListOutputFormat final : public IOutputFormat
class ProtobufListOutputFormat final : public IRowOutputFormat
{
public:
ProtobufListOutputFormat(
WriteBuffer & out_,
const Block & header_,
const RowOutputFormatParams & params_,
const FormatSchemaInfo & schema_info_);
String getName() const override { return "ProtobufListOutputFormat"; }
String getContentType() const override { return "application/octet-stream"; }
private:
void write(const Columns & columns, size_t row_num) override;
void writeField(const IColumn &, const ISerialization &, size_t) override {}
void finalizeImpl() override;
std::unique_ptr<ProtobufWriter> writer;
std::unique_ptr<ProtobufSerializer> serializer;
};
}
#endif

View File

@ -3,16 +3,13 @@
#if USE_PROTOBUF
# include <Core/Block.h>
# include <Formats/FormatFactory.h>
# include <Formats/FormatSchemaInfo.h>
# include <Formats/ProtobufReader.h>
# include <Formats/ProtobufSchemas.h>
# include <Formats/ProtobufSerializer.h>
# include <Interpreters/Context.h>
# include <base/range.h>
namespace DB
{
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
: IRowInputFormat(header_, in_, params_)
, reader(std::make_unique<ProtobufReader>(in_))
@ -20,14 +17,13 @@ ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & h
header_.getNames(),
header_.getDataTypes(),
missing_column_indices,
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No),
with_length_delimiter_,
/* with_envelope = */ false,
*reader))
{
}
ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
{
if (reader->eof())
@ -85,7 +81,7 @@ ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_setting
NamesAndTypesList ProtobufSchemaReader::readSchema()
{
const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info);
const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No);
return protobufSchemaToCHSchema(message_descriptor);
}
@ -111,7 +107,6 @@ namespace DB
{
class FormatFactory;
void registerInputFormatProtobuf(FormatFactory &) {}
void registerProtobufSchemaReader(FormatFactory &) {}
}

View File

@ -3,17 +3,16 @@
#include "config_formats.h"
#if USE_PROTOBUF
# include <Formats/FormatSchemaInfo.h>
# include <Processors/Formats/IRowInputFormat.h>
# include <Processors/Formats/ISchemaReader.h>
# include <Processors/Formats/IRowInputFormat.h>
# include <Processors/Formats/ISchemaReader.h>
# include <Formats/FormatSchemaInfo.h>
namespace DB
{
class Block;
class FormatSchemaInfo;
class ProtobufReader;
class ProtobufSerializer;
class ReadBuffer;
/** Stream designed to deserialize data from the google protobuf format.
* One Protobuf message is parsed as one row of data.
@ -30,12 +29,11 @@ class ProtobufRowInputFormat final : public IRowInputFormat
{
public:
ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
~ProtobufRowInputFormat() override;
String getName() const override { return "ProtobufRowInputFormat"; }
private:
bool readRow(MutableColumns & columns, RowReadExtension &) override;
bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override;
bool allowSyncAfterError() const override;
void syncAfterError() override;
@ -52,7 +50,7 @@ public:
NamesAndTypesList readSchema() override;
private:
FormatSchemaInfo schema_info;
const FormatSchemaInfo schema_info;
};
}

View File

@ -4,12 +4,12 @@
# include <Formats/FormatFactory.h>
# include <Core/Block.h>
# include <Formats/FormatSchemaInfo.h>
# include <Formats/FormatSettings.h>
# include <Formats/ProtobufSchemas.h>
# include <Formats/ProtobufSerializer.h>
# include <Formats/ProtobufWriter.h>
# include <google/protobuf/descriptor.h>
namespace DB
{
namespace ErrorCodes
@ -17,7 +17,6 @@ namespace ErrorCodes
extern const int NO_ROW_DELIMITER;
}
ProtobufRowOutputFormat::ProtobufRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
@ -30,8 +29,9 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
, serializer(ProtobufSerializer::create(
header_.getNames(),
header_.getDataTypes(),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No),
with_length_delimiter_,
/* with_envelope = */ false,
*writer))
, allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
{
@ -44,13 +44,12 @@ void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
"The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
ErrorCodes::NO_ROW_DELIMITER);
if (!row_num)
if (row_num == 0)
serializer->setColumns(columns.data(), columns.size());
serializer->writeRow(row_num);
}
void registerOutputFormatProtobuf(FormatFactory & factory)
{
for (bool with_length_delimiter : {false, true})

View File

@ -3,17 +3,15 @@
#include "config_formats.h"
#if USE_PROTOBUF
# include <Core/Block.h>
# include <Formats/FormatSchemaInfo.h>
# include <Formats/FormatSettings.h>
# include <Processors/Formats/IRowOutputFormat.h>
namespace DB
{
class ProtobufWriter;
class ProtobufSerializer;
class DB;
class FormatSchemaInfo;
class ProtobufSerializer;
class ProtobufWriter;
class WriteBuffer;
struct FormatSettings;
/** Stream designed to serialize data in the google protobuf format.

View File

@ -39,6 +39,9 @@
namespace DB
{
/// Number of streams is not number parts, but number or parts*files, hence 1000.
const size_t DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE = 1000;
class AlterCommands;
class MergeTreePartsMover;
class MergeTreeDataMergerMutator;

View File

@ -52,7 +52,14 @@ void MergeTreeSink::consume(Chunk chunk)
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
std::vector<MergeTreeSink::DelayedChunk::Partition> partitions;
using DelayedPartitions = std::vector<MergeTreeSink::DelayedChunk::Partition>;
DelayedPartitions partitions;
const Settings & settings = context->getSettingsRef();
size_t streams = 0;
bool support_parallel_write = false;
for (auto & current_block : part_blocks)
{
Stopwatch watch;
@ -67,9 +74,12 @@ void MergeTreeSink::consume(Chunk chunk)
if (!temp_part.part)
continue;
if (!support_parallel_write && temp_part.part->volume->getDisk()->supportParallelWrite())
support_parallel_write = true;
if (storage.getDeduplicationLog())
{
const String & dedup_token = context->getSettingsRef().insert_deduplication_token;
const String & dedup_token = settings.insert_deduplication_token;
if (!dedup_token.empty())
{
/// multiple blocks can be inserted within the same insert query
@ -79,6 +89,24 @@ void MergeTreeSink::consume(Chunk chunk)
}
}
size_t max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE;
if (!support_parallel_write || settings.max_insert_delayed_streams_for_parallel_write.changed)
max_insert_delayed_streams_for_parallel_write = settings.max_insert_delayed_streams_for_parallel_write;
/// In case of too much columns/parts in block, flush explicitly.
streams += temp_part.streams.size();
if (streams > max_insert_delayed_streams_for_parallel_write)
{
finishDelayedChunk();
delayed_chunk = std::make_unique<MergeTreeSink::DelayedChunk>();
delayed_chunk->partitions = std::move(partitions);
finishDelayedChunk();
streams = 0;
support_parallel_write = false;
partitions = DelayedPartitions{};
}
partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition
{
.temp_part = std::move(temp_part),

View File

@ -150,9 +150,14 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
if (quorum)
checkQuorumPrecondition(zookeeper);
const Settings & settings = context->getSettingsRef();
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
std::vector<ReplicatedMergeTreeSink::DelayedChunk::Partition> partitions;
String block_dedup_token;
using DelayedPartitions = std::vector<ReplicatedMergeTreeSink::DelayedChunk::Partition>;
DelayedPartitions partitions;
size_t streams = 0;
bool support_parallel_write = false;
for (auto & current_block : part_blocks)
{
@ -171,10 +176,12 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
if (deduplicate)
{
String block_dedup_token;
/// We add the hash from the data and partition identifier to deduplication ID.
/// That is, do not insert the same data to the same partition twice.
const String & dedup_token = context->getSettingsRef().insert_deduplication_token;
const String & dedup_token = settings.insert_deduplication_token;
if (!dedup_token.empty())
{
/// multiple blocks can be inserted within the same insert query
@ -182,6 +189,7 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum);
++chunk_dedup_seqnum;
}
block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token);
LOG_DEBUG(log, "Wrote block with ID '{}', {} rows", block_id, current_block.block.rows());
}
@ -192,6 +200,24 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
UInt64 elapsed_ns = watch.elapsed();
size_t max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE;
if (!support_parallel_write || settings.max_insert_delayed_streams_for_parallel_write.changed)
max_insert_delayed_streams_for_parallel_write = settings.max_insert_delayed_streams_for_parallel_write;
/// In case of too much columns/parts in block, flush explicitly.
streams += temp_part.streams.size();
if (streams > max_insert_delayed_streams_for_parallel_write)
{
finishDelayedChunk(zookeeper);
delayed_chunk = std::make_unique<ReplicatedMergeTreeSink::DelayedChunk>();
delayed_chunk->partitions = std::move(partitions);
finishDelayedChunk(zookeeper);
streams = 0;
support_parallel_write = false;
partitions = DelayedPartitions{};
}
partitions.emplace_back(ReplicatedMergeTreeSink::DelayedChunk::Partition{
.temp_part = std::move(temp_part),
.elapsed_ns = elapsed_ns,
@ -207,7 +233,7 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
/// value for `last_block_is_duplicate`, which is possible only after the part is committed.
/// Othervide we can delay commit.
/// TODO: we can also delay commit if there is no MVs.
if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
if (!settings.deduplicate_blocks_in_dependent_materialized_views)
finishDelayedChunk(zookeeper);
}

View File

@ -90,6 +90,8 @@ StorageRabbitMQ::StorageRabbitMQ(
, is_attach(is_attach_)
{
auto parsed_address = parseAddress(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_host_port), 5672);
context_->getRemoteHostFilter().checkHostAndPort(parsed_address.first, toString(parsed_address.second));
auto rabbitmq_username = rabbitmq_settings->rabbitmq_username.value;
auto rabbitmq_password = rabbitmq_settings->rabbitmq_password.value;
configuration =

View File

@ -156,6 +156,8 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C
}
context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port));
return configuration;
}

View File

@ -26,7 +26,7 @@ namespace ErrorCodes
extern const int TABLE_IS_DROPPED;
}
bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const
bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot)
{
bool has_state_column = false;
Names real_column_names;

View File

@ -70,7 +70,7 @@ public:
bool isSystemStorage() const override { return true; }
private:
bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const;
static bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot);
protected:
const FormatSettings format_settings;

View File

@ -4,7 +4,7 @@ import argparse
import logging
import os
import re
from typing import Tuple
from typing import List, Tuple
from artifactory import ArtifactorySaaSPath # type: ignore
from build_download_helper import dowload_build_with_progress
@ -283,27 +283,28 @@ def parse_args() -> argparse.Namespace:
return args
def process_deb(s3: S3, art_client: Artifactory):
def process_deb(s3: S3, art_clients: List[Artifactory]):
s3.download_deb()
if art_client is not None:
for art_client in art_clients:
art_client.deploy_deb(s3.packages)
def process_rpm(s3: S3, art_client: Artifactory):
def process_rpm(s3: S3, art_clients: List[Artifactory]):
s3.download_rpm()
if art_client is not None:
for art_client in art_clients:
art_client.deploy_rpm(s3.packages)
def process_tgz(s3: S3, art_client: Artifactory):
def process_tgz(s3: S3, art_clients: List[Artifactory]):
s3.download_tgz()
if art_client is not None:
for art_client in art_clients:
art_client.deploy_tgz(s3.packages)
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
args = parse_args()
os.makedirs(TEMP_PATH, exist_ok=True)
s3 = S3(
args.bucket_name,
args.pull_request,
@ -312,16 +313,18 @@ def main():
args.release.version,
args.force_download,
)
art_client = None
art_clients = []
if args.artifactory:
art_client = Artifactory(args.artifactory_url, args.release.type)
art_clients.append(Artifactory(args.artifactory_url, args.release.type))
if args.release.type == "lts":
art_clients.append(Artifactory(args.artifactory_url, "stable"))
if args.deb:
process_deb(s3, art_client)
process_deb(s3, art_clients)
if args.rpm:
process_rpm(s3, art_client)
process_rpm(s3, art_clients)
if args.tgz:
process_tgz(s3, art_client)
process_tgz(s3, art_clients)
if __name__ == "__main__":

View File

@ -100,10 +100,10 @@ class Release:
if self.release_type in self.BIG:
# Checkout to the commit, it will provide the correct current version
if with_prestable:
logging.info("Skipping prestable stage")
else:
with self.prestable():
logging.info("Prestable part of the releasing is done")
else:
logging.info("Skipping prestable stage")
with self.testing():
logging.info("Testing part of the releasing is done")

View File

@ -16,51 +16,77 @@ from build_download_helper import download_all_deb_packages
from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
def get_run_command(build_path, result_folder, repo_tests_path, server_log_folder, image):
cmd = "docker run --cap-add=SYS_PTRACE -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \
f"--volume={build_path}:/package_folder " \
f"--volume={result_folder}:/test_output " \
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
f"--volume={server_log_folder}:/var/log/clickhouse-server {image}"
def get_run_command(
build_path, result_folder, repo_tests_path, server_log_folder, image
):
cmd = (
"docker run --cap-add=SYS_PTRACE "
"-e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' "
f"--volume={build_path}:/package_folder "
f"--volume={result_folder}:/test_output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image}"
)
return cmd
def process_results(result_folder, server_log_path, run_log_path):
test_results = []
additional_files = []
# Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder.
# If task provides processed results, then it's responsible for content
# of result_folder.
if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files]
if os.path.exists(server_log_path):
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))]
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files]
server_log_files = [
f
for f in os.listdir(server_log_path)
if os.path.isfile(os.path.join(server_log_path, f))
]
additional_files = additional_files + [
os.path.join(server_log_path, f) for f in server_log_files
]
additional_files.append(run_log_path)
status_path = os.path.join(result_folder, "check_status.tsv")
if not os.path.exists(status_path):
return "failure", "check_status.tsv doesn't exists", test_results, additional_files
return (
"failure",
"check_status.tsv doesn't exists",
test_results,
additional_files,
)
logging.info("Found check_status.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file:
status = list(csv.reader(status_file, delimiter='\t'))
with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2:
return "error", "Invalid check_status.tsv", test_results, additional_files
state, description = status[0][0], status[0][1]
results_path = os.path.join(result_folder, "test_results.tsv")
with open(results_path, 'r', encoding='utf-8') as results_file:
test_results = list(csv.reader(results_file, delimiter='\t'))
with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0:
raise Exception("Empty results")
@ -90,7 +116,7 @@ if __name__ == "__main__":
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
docker_image = get_image_with_version(reports_path, 'clickhouse/stress-test')
docker_image = get_image_with_version(reports_path, "clickhouse/stress-test")
packages_path = os.path.join(temp_path, "packages")
if not os.path.exists(packages_path):
@ -108,7 +134,9 @@ if __name__ == "__main__":
run_log_path = os.path.join(temp_path, "runlog.log")
run_command = get_run_command(packages_path, result_path, repo_tests_path, server_log_path, docker_image)
run_command = get_run_command(
packages_path, result_path, repo_tests_path, server_log_path, docker_image
)
logging.info("Going to run func tests: %s", run_command)
with TeePopen(run_command, run_log_path) as process:
@ -120,16 +148,32 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
state, description, test_results, additional_logs = process_results(result_path, server_log_path, run_log_path)
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_results(
result_path, server_log_path, run_log_path
)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -202,6 +202,29 @@ def get_processlist(args):
return clickhouse_execute_json(args, 'SHOW PROCESSLIST')
def get_processlist_after_test(args):
log_comment = args.testcase_basename
database = args.testcase_database
if args.replicated_database:
return clickhouse_execute_json(args, f"""
SELECT materialize((hostName(), tcpPort())) as host, *
FROM clusterAllReplicas('test_cluster_database_replicated', system.processes)
WHERE
query NOT LIKE '%system.processes%' AND
Settings['log_comment'] = '{log_comment}' AND
current_database = '{database}'
""")
else:
return clickhouse_execute_json(args, f"""
SELECT *
FROM system.processes
WHERE
query NOT LIKE '%system.processes%' AND
Settings['log_comment'] = '{log_comment}' AND
current_database = '{database}'
""")
# collect server stacktraces using gdb
def get_stacktraces_from_gdb(server_pid):
try:
@ -404,7 +427,7 @@ class TestCase:
testcase_args.testcase_start_time = datetime.now()
testcase_basename = os.path.basename(case_file)
testcase_args.testcase_client = f"{testcase_args.client} --log_comment='{testcase_basename}'"
testcase_args.testcase_client = f"{testcase_args.client} --log_comment '{testcase_basename}'"
testcase_args.testcase_basename = testcase_basename
if testcase_args.database:
@ -672,6 +695,16 @@ class TestCase:
proc.stdout is None or 'Exception' not in proc.stdout)
need_drop_database = not maybe_passed
left_queries_check = args.no_left_queries_check is False
if self.tags and 'no-left-queries-check' in self.tags:
left_queries_check = False
if left_queries_check:
processlist = get_processlist_after_test(args)
if processlist:
print(colored(f"\nFound queries left in processlist after running {args.testcase_basename} (database={database}):", args, "red", attrs=["bold"]))
print(json.dumps(processlist, indent=4))
exit_code.value = 1
if need_drop_database:
seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 20)
try:
@ -1411,6 +1444,7 @@ if __name__ == '__main__':
parser.add_argument('--order', default='desc', choices=['asc', 'desc', 'random'], help='Run order')
parser.add_argument('--testname', action='store_true', default=None, dest='testname', help='Make query with test name before test run')
parser.add_argument('--hung-check', action='store_true', default=False)
parser.add_argument('--no-left-queries-check', action='store_true', default=False)
parser.add_argument('--force-color', action='store_true', default=False)
parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')
parser.add_argument('--no-drop-if-fail', action='store_true', help='Do not drop database for test if test has failed')

View File

@ -468,6 +468,17 @@ def test_datetime64(started_cluster):
assert(result.strip() == '1960-01-01 20:00:00.000000')
def test_uuid(started_cluster):
cursor = started_cluster.postgres_conn.cursor()
cursor.execute("drop table if exists test")
cursor.execute("create table test (u uuid)")
cursor.execute("""CREATE EXTENSION IF NOT EXISTS "uuid-ossp";""")
cursor.execute("insert into test select uuid_generate_v1();")
result = node1.query("select toTypeName(u) from postgresql(postgres1, table='test')")
assert(result.strip() == 'Nullable(UUID)')
if __name__ == '__main__':
cluster.start()
input("Cluster created, press any key to destroy...")

View File

@ -0,0 +1,50 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
</preconditions>
<query>
SELECT *
FROM
(
SELECT
RegionID,
toDateTime(EventDate) AS date,
count() AS quantity
FROM test.hits
GROUP BY
RegionID,
date
) AS regions
ASOF LEFT JOIN
(
SELECT
RegionID,
EventTime
FROM test.hits
) AS ids ON (regions.RegionID = ids.RegionID) AND (regions.date &lt; ids.EventTime)
</query>
<query>
SELECT * FROM
(
SELECT
toStartOfDay(now()) + INTERVAL (seconds_since_start_of_day % 86000) SECOND AS date,
fingerprint % 50 as fingerprint,
multiIf(browserId % 3 == 0, 'firefox', browserId % 3 == 1, 'edge', 'chrome') as browser
FROM generateRandom('seconds_since_start_of_day UInt32, fingerprint UInt8, browserId UInt8') LIMIT 100000
) AS origins
ASOF LEFT JOIN
(
SELECT
toStartOfDay(now()) + INTERVAL (seconds_since_start_of_day % 86000) SECOND AS date,
fingerprint % 50 as fingerprint,
multiIf(language % 2 == 0, 'es', 'en') as lang
FROM generateRandom('seconds_since_start_of_day UInt32, fingerprint UInt8, language UInt8') LIMIT 5000000
) AS visits
ON (visits.fingerprint = origins.fingerprint AND visits.date &gt;= origins.date)
FORMAT Null
</query>
</test>

View File

@ -1,4 +1,4 @@
-- Tags: shard
-- Tags: shard, no-fasttest
SET send_logs_level = 'fatal';
SELECT count() FROM remote('{127,1}.0.0.{2,3}', system.one) SETTINGS skip_unavailable_shards = 1;

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# Tags: long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -14,7 +15,7 @@ $CLICKHOUSE_CLIENT -q "select name from system.table_functions format TSV;" > "$
# if you want long run use: env SQL_FUZZY_RUNS=100000 clickhouse-test sql_fuzzy
for SQL_FUZZY_RUN in $(seq "${SQL_FUZZY_RUNS:=5}"); do
env SQL_FUZZY_RUN="$SQL_FUZZY_RUN" perl "$CURDIR"/00746_sql_fuzzy.pl | timeout 60 $CLICKHOUSE_CLIENT --format Null --max_execution_time 10 -n --ignore-error >/dev/null 2>&1
env SQL_FUZZY_RUN="$SQL_FUZZY_RUN" perl "$CURDIR"/00746_sql_fuzzy.pl | clickhouse_client_timeout 60 $CLICKHOUSE_CLIENT --format Null --max_execution_time 10 -n --ignore-error >/dev/null 2>&1
if [[ $($CLICKHOUSE_CLIENT -q "SELECT 'Still alive'") != 'Still alive' ]]; then
break
fi

View File

@ -7,64 +7,51 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo "DROP TABLE IF EXISTS concurrent_alter_column" | ${CLICKHOUSE_CLIENT}
echo "CREATE TABLE concurrent_alter_column (ts DATETIME) ENGINE = MergeTree PARTITION BY toStartOfDay(ts) ORDER BY tuple()" | ${CLICKHOUSE_CLIENT}
$CLICKHOUSE_CLIENT -nm -q "
DROP TABLE IF EXISTS concurrent_alter_column;
CREATE TABLE concurrent_alter_column (ts DATETIME) ENGINE = MergeTree PARTITION BY toStartOfDay(ts) ORDER BY tuple();
"
function thread1()
{
while true; do
for i in {1..500}; do echo "ALTER TABLE concurrent_alter_column ADD COLUMN c$i DOUBLE;"; done | ${CLICKHOUSE_CLIENT} -n --query_id=alter_00816_1
done
for i in {1..500}; do
echo "ALTER TABLE concurrent_alter_column ADD COLUMN c$i DOUBLE;"
done | ${CLICKHOUSE_CLIENT} -n
}
function thread2()
{
while true; do
echo "ALTER TABLE concurrent_alter_column ADD COLUMN d DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_2;
sleep "$(echo 0.0$RANDOM)";
echo "ALTER TABLE concurrent_alter_column DROP COLUMN d" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_2;
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column ADD COLUMN d DOUBLE"
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column DROP COLUMN d"
}
function thread3()
{
while true; do
echo "ALTER TABLE concurrent_alter_column ADD COLUMN e DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_3;
sleep "$(echo 0.0$RANDOM)";
echo "ALTER TABLE concurrent_alter_column DROP COLUMN e" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_3;
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column ADD COLUMN e DOUBLE"
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column DROP COLUMN e"
}
function thread4()
{
while true; do
echo "ALTER TABLE concurrent_alter_column ADD COLUMN f DOUBLE" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_4;
sleep "$(echo 0.0$RANDOM)";
echo "ALTER TABLE concurrent_alter_column DROP COLUMN f" | ${CLICKHOUSE_CLIENT} --query_id=alter_00816_4;
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column ADD COLUMN f DOUBLE"
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_column DROP COLUMN f"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
TIMEOUT=30
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
wait
echo "DROP TABLE concurrent_alter_column NO DELAY" | ${CLICKHOUSE_CLIENT} # NO DELAY has effect only for Atomic database
# Wait for alters and check for deadlocks (in case of deadlock this loop will not finish)
while true; do
echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter\\_00816\\_%'" | ${CLICKHOUSE_CLIENT} | grep -q -F 'alter' || break
sleep 1;
done
$CLICKHOUSE_CLIENT -q "DROP TABLE concurrent_alter_column NO DELAY"
echo 'did not crash'

View File

@ -1,6 +1,10 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# End-to-end test of serialization/deserialization of a table with different
# data types to/from Protobuf format.
# Cf. 02240_protobuflist_format_persons.sh
# To generate reference file for this test use the following commands:
# ninja ProtobufDelimitedMessagesSerializer
# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer

View File

@ -339,6 +339,9 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unkno
(NULL)
=== Try load data from single_nan.parquet
\N
=== Try load data from test_setting_input_format_use_lowercase_column_name.parquet
123 1
456 2
=== Try load data from userdata1.parquet
1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02
1454519043 2 Albert Freeman afreeman1@is.gd Male 218.111.175.34 Canada 1/16/1968 150280.17 Accountant IV

View File

@ -40,11 +40,13 @@ DATA_DIR=$CUR_DIR/data_parquet
# Code: 349. DB::Ex---tion: Can not insert NULL data into non-nullable column "phoneNumbers": data for INSERT was parsed from stdin
for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | LC_ALL=C sort); do
echo === Try load data from "$NAME"
JSON=$DATA_DIR/$NAME.json
COLUMNS_FILE=$DATA_DIR/$NAME.columns
([ -z "$PARQUET_READER" ] || [ ! -s "$PARQUET_READER" ]) && [ ! -s "$COLUMNS_FILE" ] && continue
echo === Try load data from "$NAME"
# If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns
[ -n "$PARQUET_READER" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$PARQUET_READER" --json "$DATA_DIR"/"$NAME" > "$JSON"
[ ! -s "$COLUMNS_FILE" ] && "$CUR_DIR"/helpers/00900_parquet_create_table_columns.py "$JSON" > "$COLUMNS_FILE"

View File

@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
DATA_FILE=$CUR_DIR/data_orc/test.orc
DATA_FILE=$CUR_DIR/data_orc/test_$CLICKHOUSE_TEST_UNIQUE_NAME.orc
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (int Int32, smallint Int8, bigint Int64, float Float32, double Float64, date Date, y String, datetime64 DateTime64(3)) ENGINE = Memory"
@ -14,7 +14,7 @@ ${CLICKHOUSE_CLIENT} --query="select * from orc_load FORMAT ORC" > $DATA_FILE
${CLICKHOUSE_CLIENT} --query="truncate table orc_load"
cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC"
timeout 3 ${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC" < $DATA_FILE
clickhouse_client_timeout 3 ${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC" < $DATA_FILE
${CLICKHOUSE_CLIENT} --query="select * from orc_load"
${CLICKHOUSE_CLIENT} --query="drop table orc_load"
rm -rf "$DATA_FILE"

View File

@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_to_kill' ASYNC" &>/dev/nul
sleep 1
# Kill $query_for_pending SYNC. This query is not blocker, so it should be killed fast.
timeout 20 ${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE query='$query_for_pending' SYNC" &>/dev/null
clickhouse_client_timeout 20 ${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE query='$query_for_pending' SYNC" &>/dev/null
# Both queries have to be killed, doesn't matter with SYNC or ASYNC kill
for _ in {1..15}

View File

@ -9,40 +9,45 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table"
$CLICKHOUSE_CLIENT -q "CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = MergeTree ORDER BY a"
$CLICKHOUSE_CLIENT -nm -q "
DROP TABLE IF EXISTS alter_table;
CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = MergeTree ORDER BY a;
"
function thread1()
{
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
while true; do $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; done
$CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"
}
function thread2()
{
while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done
$CLICKHOUSE_CLIENT -n --query "
ALTER TABLE alter_table ADD COLUMN h String;
ALTER TABLE alter_table MODIFY COLUMN h UInt64;
ALTER TABLE alter_table DROP COLUMN h;
"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread1
export -f thread2
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread1 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
timeout 15 bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread1 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
clickhouse_client_loop_timeout 15 thread2 2> /dev/null &
wait

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race
# Tags: race, long
# This test is disabled because it triggers internal assert in Thread Sanitizer.
# Thread Sanitizer does not support for more than 64 mutexes to be locked in a single thread.
@ -11,67 +11,68 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table"
$CLICKHOUSE_CLIENT -q "CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = MergeTree ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1"
$CLICKHOUSE_CLIENT -nm -q "
DROP TABLE IF EXISTS alter_table;
CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = MergeTree ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1;
"
function thread1()
{
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
while true; do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"; done
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"
}
function thread2()
{
while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String '0'; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String '0'; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"
}
function thread3()
{
while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done
$CLICKHOUSE_CLIENT -q "INSERT INTO alter_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"
}
function thread4()
{
while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table FINAL"; done
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table FINAL"
}
function thread5()
{
while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table DELETE WHERE rand() % 2 = 1"; done
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table DELETE WHERE rand() % 2 = 1"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread5;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
export -f thread5
TIMEOUT=30
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
wait

View File

@ -9,76 +9,95 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
$CLICKHOUSE_CLIENT -n -q "
$CLICKHOUSE_CLIENT -mn -q "
DROP TABLE IF EXISTS alter_table0;
DROP TABLE IF EXISTS alter_table1;
CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0;
CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0
CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16))
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1')
ORDER BY a
PARTITION BY b % 10
SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0;
CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16))
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2')
ORDER BY a
PARTITION BY b % 10
SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0;
"
function thread1()
{
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
while true; do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"; done
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"
}
function thread2()
{
while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table0 MODIFY COLUMN h UInt64; ALTER TABLE alter_table0 DROP COLUMN h;"; done
$CLICKHOUSE_CLIENT -nm --query "
ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0';
ALTER TABLE alter_table0 MODIFY COLUMN h UInt64;
ALTER TABLE alter_table0 DROP COLUMN h;
"
}
function thread3()
{
while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table0 SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done
$CLICKHOUSE_CLIENT -q "
INSERT INTO alter_table0
SELECT
rand(1), rand(2), 1 / rand(3), toString(rand(4)),
[rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(),
(rand(8), rand(9))
FROM numbers(100000)"
}
function thread4()
{
while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"
}
function thread5()
{
while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread5;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
export -f thread5
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
wait
check_replication_consistency "alter_table" "count(), sum(a), sum(b), round(sum(c))"
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table0;" 2> >(grep -F -v 'is already started to be removing by another replica right now') &
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table1;" 2> >(grep -F -v 'is already started to be removing by another replica right now') &
wait

View File

@ -12,93 +12,87 @@ set -e
function thread1()
{
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
while true; do
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null";
done
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"
}
function thread2()
{
while true; do
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table_$REPLICA ADD COLUMN h String '0'; ALTER TABLE alter_table_$REPLICA MODIFY COLUMN h UInt64; ALTER TABLE alter_table_$REPLICA DROP COLUMN h;";
done
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table_$REPLICA ADD COLUMN h String '0'; ALTER TABLE alter_table_$REPLICA MODIFY COLUMN h UInt64; ALTER TABLE alter_table_$REPLICA DROP COLUMN h;"
}
function thread3()
{
while true; do
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "INSERT INTO alter_table_$REPLICA SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)";
done
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "INSERT INTO alter_table_$REPLICA SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"
}
function thread4()
{
while true; do
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table_$REPLICA FINAL";
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table_$REPLICA FINAL"
sleep 0.$RANDOM
}
function thread5()
{
while true; do
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table_$REPLICA DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288";
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table_$REPLICA DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"
sleep 0.$RANDOM
}
function thread6()
{
while true; do
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS alter_table_$REPLICA;
CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;";
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -mn -q "
DROP TABLE IF EXISTS alter_table_$REPLICA;
CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16))
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA')
ORDER BY a
PARTITION BY b % 10
SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;
"
sleep 0.$RANDOM
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread5;
export -f thread6;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
export -f thread5
export -f thread6
TIMEOUT=30
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
wait

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race
# Tags: race, long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -7,35 +7,32 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test1";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test2";
$CLICKHOUSE_CLIENT --query "CREATE TABLE test1 (x UInt64) ENGINE = Memory";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test1"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test2"
$CLICKHOUSE_CLIENT --query "CREATE TABLE test1 (x UInt64) ENGINE = Memory"
function thread1()
{
while true; do
seq 1 1000 | sed -r -e 's/.+/RENAME TABLE test1 TO test2; RENAME TABLE test2 TO test1;/' | $CLICKHOUSE_CLIENT -n
done
seq 1 1000 | {
sed -r -e 's/.+/RENAME TABLE test1 TO test2; RENAME TABLE test2 TO test1;/'
} | $CLICKHOUSE_CLIENT -n
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT --query "SELECT * FROM merge('$CLICKHOUSE_DATABASE', '^test[12]$')"
done
$CLICKHOUSE_CLIENT --query "SELECT * FROM merge('$CLICKHOUSE_DATABASE', '^test[12]$')"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread1
export -f thread2
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
wait
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test1";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test2";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test1"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test2"

View File

@ -12,48 +12,39 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE test (x UInt8, s String MATERIALIZED to
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT --query "INSERT INTO test SELECT rand() FROM numbers(1000)";
done
$CLICKHOUSE_CLIENT --query "INSERT INTO test SELECT rand() FROM numbers(1000)"
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE test MODIFY COLUMN x Nullable(UInt8);";
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE test MODIFY COLUMN x UInt8;";
sleep 0.0$RANDOM
done
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE test MODIFY COLUMN x Nullable(UInt8);"
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT -n --query "ALTER TABLE test MODIFY COLUMN x UInt8;"
sleep 0.0$RANDOM
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT -n --query "SELECT count() FROM test FORMAT Null";
done
$CLICKHOUSE_CLIENT -n --query "SELECT count() FROM test FORMAT Null"
}
function thread4()
{
while true; do
$CLICKHOUSE_CLIENT -n --query "OPTIMIZE TABLE test FINAL";
sleep 0.1$RANDOM
done
$CLICKHOUSE_CLIENT -n --query "OPTIMIZE TABLE test FINAL"
sleep 0.1$RANDOM
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
wait

View File

@ -9,44 +9,37 @@ set -e
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT --query_id=hello --query "SELECT count() FROM numbers(1000000000)" --format Null;
done
$CLICKHOUSE_CLIENT --query_id=hello --query "SELECT count() FROM numbers(1000000000)" --format Null;
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id = 'hello'" --format Null;
sleep 0.$RANDOM
done
$CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id = 'hello'" --format Null
sleep 0.$RANDOM
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" --format Null;
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.processes" --format Null;
done
$CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" --format Null
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.processes" --format Null
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread1
export -f thread2
export -f thread3
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
wait

View File

@ -14,48 +14,41 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE test2 (x UInt8) ENGINE = MergeTree ORDE
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT --query "RENAME TABLE test1 TO test_tmp, test2 TO test1, test_tmp TO test2"
done
$CLICKHOUSE_CLIENT --query "RENAME TABLE test1 TO test_tmp, test2 TO test1, test_tmp TO test2"
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT --query "SELECT * FROM test1 UNION ALL SELECT * FROM test2" --format Null
done
$CLICKHOUSE_CLIENT --query "SELECT * FROM test1 UNION ALL SELECT * FROM test2" --format Null
}
function thread3()
{
while true; do
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.tables" --format Null
done
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.tables" --format Null
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread1
export -f thread2
export -f thread3
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
wait
sleep 1

View File

@ -12,36 +12,31 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE test1 (x UInt8) ENGINE = MergeTree ORDE
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT --query "ALTER TABLE test1 MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE test1 MODIFY COLUMN x UInt8"
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE test1 MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE test1 MODIFY COLUMN x UInt8"
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT --query "SELECT x FROM test1 WHERE x IN (SELECT x FROM remote('127.0.0.2', '$CLICKHOUSE_DATABASE', test1))" --format Null
done
$CLICKHOUSE_CLIENT --query "SELECT x FROM test1 WHERE x IN (SELECT x FROM remote('127.0.0.2', '$CLICKHOUSE_DATABASE', test1))" --format Null
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread1
export -f thread2
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
wait

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: deadlock
# Tags: deadlock, long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -16,48 +16,39 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE b (x UInt8) ENGINE = MergeTree ORDER BY
function thread1()
{
while true; do
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
seq 1 100 | awk '{ print "SELECT x FROM a WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
done
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
seq 1 100 | awk '{ print "SELECT x FROM a WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
}
function thread2()
{
while true; do
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
seq 1 100 | awk '{ print "SELECT x FROM b WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
done
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
seq 1 100 | awk '{ print "SELECT x FROM b WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x UInt8"
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x UInt8"
}
function thread4()
{
while true; do
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x UInt8"
done
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x UInt8"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
wait

View File

@ -19,7 +19,7 @@ ${CLICKHOUSE_CLIENT} -n -q "
INSERT INTO $R1 VALUES (1)
"
timeout 10s ${CLICKHOUSE_CLIENT} -n -q "
clickhouse_client_timeout 10s ${CLICKHOUSE_CLIENT} --receive_timeout 1 -n -q "
SET receive_timeout=1;
SYSTEM SYNC REPLICA $R2
" 2>&1 | grep -F -q "Code: 159. DB::Exception" && echo 'OK' || echo 'Failed!'

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-parallel
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -7,95 +7,67 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
export CURR_DATABASE="test_lazy_01014_concurrent_${CLICKHOUSE_DATABASE}"
function recreate_lazy_func1()
{
$CLICKHOUSE_CLIENT -q "
CREATE TABLE $CURR_DATABASE.log (a UInt64, b UInt64) ENGINE = Log;
";
while true; do
$CLICKHOUSE_CLIENT -q "
DETACH TABLE $CURR_DATABASE.log;
";
$CLICKHOUSE_CLIENT -q "
ATTACH TABLE $CURR_DATABASE.log;
";
done
$CLICKHOUSE_CLIENT -nm -q "
DETACH TABLE $CURR_DATABASE.log;
ATTACH TABLE $CURR_DATABASE.log;
"
}
function recreate_lazy_func2()
{
while true; do
$CLICKHOUSE_CLIENT -q "
CREATE TABLE $CURR_DATABASE.tlog (a UInt64, b UInt64) ENGINE = TinyLog;
";
$CLICKHOUSE_CLIENT -q "
DROP TABLE $CURR_DATABASE.tlog;
";
done
$CLICKHOUSE_CLIENT -nm -q "
CREATE TABLE $CURR_DATABASE.tlog (a UInt64, b UInt64) ENGINE = TinyLog;
DROP TABLE $CURR_DATABASE.tlog;
"
}
function recreate_lazy_func3()
{
$CLICKHOUSE_CLIENT -q "
CREATE TABLE $CURR_DATABASE.slog (a UInt64, b UInt64) ENGINE = StripeLog;
";
while true; do
$CLICKHOUSE_CLIENT -q "
ATTACH TABLE $CURR_DATABASE.slog;
";
$CLICKHOUSE_CLIENT -q "
DETACH TABLE $CURR_DATABASE.slog;
";
done
$CLICKHOUSE_CLIENT -nm -q "
ATTACH TABLE $CURR_DATABASE.slog;
DETACH TABLE $CURR_DATABASE.slog;
"
}
function recreate_lazy_func4()
{
while true; do
$CLICKHOUSE_CLIENT -q "
CREATE TABLE $CURR_DATABASE.tlog2 (a UInt64, b UInt64) ENGINE = TinyLog;
";
$CLICKHOUSE_CLIENT -q "
DROP TABLE $CURR_DATABASE.tlog2;
";
done
$CLICKHOUSE_CLIENT -nm -q "
CREATE TABLE $CURR_DATABASE.tlog2 (a UInt64, b UInt64) ENGINE = TinyLog;
DROP TABLE $CURR_DATABASE.tlog2;
"
}
function show_tables_func()
{
while true; do
$CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE database = '$CURR_DATABASE' FORMAT Null";
done
$CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE database = '$CURR_DATABASE' FORMAT Null"
}
export -f recreate_lazy_func1;
export -f recreate_lazy_func2;
export -f recreate_lazy_func3;
export -f recreate_lazy_func4;
export -f show_tables_func;
export -f recreate_lazy_func1
export -f recreate_lazy_func2
export -f recreate_lazy_func3
export -f recreate_lazy_func4
export -f show_tables_func
${CLICKHOUSE_CLIENT} -n -q "
DROP DATABASE IF EXISTS $CURR_DATABASE;
CREATE DATABASE $CURR_DATABASE ENGINE = Lazy(1);
CREATE TABLE $CURR_DATABASE.log (a UInt64, b UInt64) ENGINE = Log;
CREATE TABLE $CURR_DATABASE.slog (a UInt64, b UInt64) ENGINE = StripeLog;
"
TIMEOUT=30
timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null &
timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null &
timeout $TIMEOUT bash -c recreate_lazy_func3 2> /dev/null &
timeout $TIMEOUT bash -c recreate_lazy_func4 2> /dev/null &
timeout $TIMEOUT bash -c show_tables_func 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT recreate_lazy_func1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT recreate_lazy_func2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT recreate_lazy_func3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT recreate_lazy_func4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT show_tables_func 2> /dev/null &
wait
sleep 1
@ -108,4 +80,3 @@ ${CLICKHOUSE_CLIENT} -q "ATTACH TABLE $CURR_DATABASE.tlog2;" 2>/dev/null
${CLICKHOUSE_CLIENT} -q "DROP DATABASE $CURR_DATABASE"
echo "Test OK"

View File

@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
$CLICKHOUSE_CLIENT -n -q "
$CLICKHOUSE_CLIENT -mn -q "
DROP DATABASE IF EXISTS database_for_dict;
DROP TABLE IF EXISTS table_for_dict1;
DROP TABLE IF EXISTS table_for_dict2;
@ -20,96 +20,104 @@ $CLICKHOUSE_CLIENT -n -q "
CREATE DATABASE database_for_dict;
CREATE DICTIONARY database_for_dict.dict1 (key_column UInt64, value_column String) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict1' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) LIFETIME(MIN 1 MAX 5) LAYOUT(FLAT());
CREATE DICTIONARY database_for_dict.dict1 (key_column UInt64, value_column String)
PRIMARY KEY key_column
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict1' PASSWORD '' DB '$CLICKHOUSE_DATABASE'))
LIFETIME(MIN 1 MAX 5)
LAYOUT(FLAT());
CREATE DICTIONARY database_for_dict.dict2 (key_column UInt64, value_column String) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict2' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) LIFETIME(MIN 1 MAX 5) LAYOUT(CACHE(SIZE_IN_CELLS 150));
CREATE DICTIONARY database_for_dict.dict2 (key_column UInt64, value_column String)
PRIMARY KEY key_column
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict2' PASSWORD '' DB '$CLICKHOUSE_DATABASE'))
LIFETIME(MIN 1 MAX 5)
LAYOUT(CACHE(SIZE_IN_CELLS 150));
"
function thread1()
{
while true; do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.dictionaries FORMAT Null"; done
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.dictionaries FORMAT Null"
}
function thread2()
{
while true; do CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict1" ||: ; done
$CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict1" ||:
}
function thread3()
{
while true; do CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict2" ||:; done
$CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict2" ||:
}
function thread4()
{
while true; do $CLICKHOUSE_CLIENT -n -q "
$CLICKHOUSE_CLIENT -n -q "
SELECT * FROM database_for_dict.dict1 FORMAT Null;
SELECT * FROM database_for_dict.dict2 FORMAT Null;
" ||: ; done
" ||:
}
function thread5()
{
while true; do $CLICKHOUSE_CLIENT -n -q "
$CLICKHOUSE_CLIENT -n -q "
SELECT dictGetString('database_for_dict.dict1', 'value_column', toUInt64(number)) from numbers(1000) FROM FORMAT Null;
SELECT dictGetString('database_for_dict.dict2', 'value_column', toUInt64(number)) from numbers(1000) FROM FORMAT Null;
" ||: ; done
" ||:
}
function thread6()
{
while true; do $CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict1"; done
$CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict1"
}
function thread7()
{
while true; do $CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict2"; done
$CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict2"
}
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread5;
export -f thread6;
export -f thread7;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
export -f thread5
export -f thread6
export -f thread7
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null &
timeout $TIMEOUT bash -c thread7 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread7 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null &
timeout $TIMEOUT bash -c thread7 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread7 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null &
timeout $TIMEOUT bash -c thread7 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread7 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null &
timeout $TIMEOUT bash -c thread7 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread6 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread7 2> /dev/null &
wait
$CLICKHOUSE_CLIENT -q "SELECT 'Still alive'"

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# Tags: long
set -e
@ -29,19 +30,15 @@ EOF
function alter_thread()
{
trap 'exit' INT
ALTERS[0]="ALTER TABLE mv MODIFY QUERY SELECT v FROM src;"
ALTERS[1]="ALTER TABLE mv MODIFY QUERY SELECT v * 2 as v FROM src;"
while true; do
$CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 -q "${ALTERS[$RANDOM % 2]}"
sleep "$(echo 0.$RANDOM)";
done
$CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 -q "${ALTERS[$RANDOM % 2]}"
sleep 0.$RANDOM
}
export -f alter_thread;
timeout 10 bash -c alter_thread &
export -f alter_thread
clickhouse_client_loop_timeout 10 alter_thread &
for _ in {1..100}; do
# Retry (hopefully retriable (deadlock avoided)) errors.

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: zookeeper
# Tags: zookeeper, no-fasttest
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: zookeeper, no-parallel
# Tags: zookeeper, no-parallel, no-fasttest
set -e
@ -15,66 +15,46 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, k String) ENGINE = Repli
function thread1()
{
while true;
do
$CLICKHOUSE_CLIENT --query="ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" --query_id=query1
done
$CLICKHOUSE_CLIENT --query="ALTER TABLE src MOVE PARTITION 1 TO TABLE dst"
}
function thread2()
{
while true;
do
$CLICKHOUSE_CLIENT --query="INSERT INTO src SELECT number % 2, toString(number) FROM system.numbers LIMIT 100000" --query_id=query2
done
$CLICKHOUSE_CLIENT --query="INSERT INTO src SELECT number % 2, toString(number) FROM system.numbers LIMIT 100000"
}
function thread3()
{
while true;
do
$CLICKHOUSE_CLIENT --query="SELECT * FROM src" --query_id=query3 1> /dev/null
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM src" > /dev/null
}
function thread4()
{
while true;
do
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst" --query_id=query4 1> /dev/null
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst" > /dev/null
}
function thread5()
{
while true;
do
$CLICKHOUSE_CLIENT --query="ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" --query_id=query5
done
$CLICKHOUSE_CLIENT --query="ALTER TABLE src MOVE PARTITION 1 TO TABLE dst"
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread5;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
export -f thread5
TIMEOUT=30
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread4 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread5 2> /dev/null &
wait
echo "DROP TABLE src NO DELAY" | ${CLICKHOUSE_CLIENT}
echo "DROP TABLE dst NO DELAY" | ${CLICKHOUSE_CLIENT}
sleep 5
# Check for deadlocks
echo "SELECT * FROM system.processes WHERE query_id LIKE 'query%'" | ${CLICKHOUSE_CLIENT}
echo 'did not crash'

View File

@ -18,56 +18,44 @@ $CLICKHOUSE_CLIENT --query="insert into test_01054.ints values (3, 3, 3, 3, 3, 3
function thread1()
{
for _ in {1..100}
do
RAND_NUMBER_THREAD1=$($CLICKHOUSE_CLIENT --query="SELECT rand() % 100;")
$CLICKHOUSE_CLIENT --query="select dictGet('one_cell_cache_ints', 'i8', toUInt64($RAND_NUMBER_THREAD1));"
done
}
function thread2()
{
for _ in {1..100}
do
RAND_NUMBER_THREAD2=$($CLICKHOUSE_CLIENT --query="SELECT rand() % 100;")
$CLICKHOUSE_CLIENT --query="select dictGet('one_cell_cache_ints', 'i8', toUInt64($RAND_NUMBER_THREAD2));"
done
}
function thread3()
{
for _ in {1..100}
do
RAND_NUMBER_THREAD3=$($CLICKHOUSE_CLIENT --query="SELECT rand() % 100;")
$CLICKHOUSE_CLIENT --query="select dictGet('one_cell_cache_ints', 'i8', toUInt64($RAND_NUMBER_THREAD3));"
done
}
function thread4()
{
for _ in {1..100}
do
RAND_NUMBER_THREAD4=$($CLICKHOUSE_CLIENT --query="SELECT rand() % 100;")
$CLICKHOUSE_CLIENT --query="select dictGet('one_cell_cache_ints', 'i8', toUInt64($RAND_NUMBER_THREAD4));"
done
}
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
export -f thread1
export -f thread2
export -f thread3
export -f thread4
TIMEOUT=10
# shellcheck disable=SC2188
timeout $TIMEOUT bash -c thread1 > /dev/null 2>&1 &
timeout $TIMEOUT bash -c thread2 > /dev/null 2>&1 &
timeout $TIMEOUT bash -c thread3 > /dev/null 2>&1 &
timeout $TIMEOUT bash -c thread4 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread1 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread2 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread3 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread4 > /dev/null 2>&1 &
wait

View File

@ -38,30 +38,24 @@ LAYOUT(CACHE(SIZE_IN_CELLS 10));
function thread1()
{
for _ in {1..50}
do
# This query will be ended with exception, because source dictionary has UUID as a key type.
$CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(1));"
done
# This query will be ended with exception, because source dictionary has UUID as a key type.
$CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(1));"
}
function thread2()
{
for _ in {1..50}
do
# This query will be ended with exception, because source dictionary has UUID as a key type.
$CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(2));"
done
# This query will be ended with exception, because source dictionary has UUID as a key type.
$CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(2));"
}
export -f thread1;
export -f thread2;
export -f thread1
export -f thread2
TIMEOUT=5
timeout $TIMEOUT bash -c thread1 > /dev/null 2>&1 &
timeout $TIMEOUT bash -c thread2 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread1 > /dev/null 2>&1 &
clickhouse_client_loop_timeout $TIMEOUT thread2 > /dev/null 2>&1 &
wait

View File

@ -21,7 +21,12 @@ for i in $(seq $REPLICAS); do
done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_mutate_mt_$i (key UInt64, value1 UInt64, value2 String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_mutate_mt', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000,temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0"
$CLICKHOUSE_CLIENT -nm --query "
CREATE TABLE concurrent_mutate_mt_$i (key UInt64, value1 UInt64, value2 String)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_mutate_mt', '$i')
ORDER BY key
SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000,temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0;
"
done
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_mutate_mt_1 SELECT number, number + 10, toString(number) from numbers(10)"
@ -40,59 +45,52 @@ INITIAL_SUM=$($CLICKHOUSE_CLIENT --query "SELECT SUM(value1) FROM concurrent_mut
# Run mutation on random replica
function correct_alter_thread()
{
while true; do
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_mutate_mt_$REPLICA UPDATE value1 = value1 + 1 WHERE 1";
sleep 1
done
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_mutate_mt_$REPLICA UPDATE value1 = value1 + 1 WHERE 1"
sleep 1
}
# This thread add some data to table.
function insert_thread()
{
VALUES=(7 8 9)
while true; do
REPLICA=$(($RANDOM % 5 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_mutate_mt_$REPLICA VALUES($RANDOM, $VALUE, toString($VALUE))"
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 5 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_mutate_mt_$REPLICA VALUES($RANDOM, $VALUE, toString($VALUE))"
sleep 0.$RANDOM
}
function detach_attach_thread()
{
while true; do
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_mutate_mt_$REPLICA"
sleep 0.$RANDOM
sleep 0.$RANDOM
sleep 0.$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_mutate_mt_$REPLICA"
done
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_mutate_mt_$REPLICA"
sleep 0.$RANDOM
sleep 0.$RANDOM
sleep 0.$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_mutate_mt_$REPLICA"
}
echo "Starting alters"
export -f correct_alter_thread;
export -f insert_thread;
export -f detach_attach_thread;
export -f correct_alter_thread
export -f insert_thread
export -f detach_attach_thread
# We assign a lot of mutations so timeout shouldn't be too big
TIMEOUT=15
timeout $TIMEOUT bash -c detach_attach_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT detach_attach_thread 2> /dev/null &
timeout $TIMEOUT bash -c correct_alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
wait

View File

@ -15,7 +15,12 @@ done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000"
$CLICKHOUSE_CLIENT -nm --query "
CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i')
ORDER BY key
SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000;
"
done
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_1 SELECT number, number + 10 from numbers(100000)"
@ -27,58 +32,54 @@ done
function alter_thread()
{
while true; do
REPLICA=$(($RANDOM % 3 + 1))
ADD=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_$REPLICA ADD COLUMN value$ADD UInt32 DEFAULT 42 SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency
DROP=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_$REPLICA DROP COLUMN value$DROP SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 3 + 1))
ADD=$(($RANDOM % 5 + 1))
# additionaly we don't wait anything for more heavy concurrency
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_$REPLICA ADD COLUMN value$ADD UInt32 DEFAULT 42 SETTINGS replication_alter_partitions_sync=0"
DROP=$(($RANDOM % 5 + 1))
# additionaly we don't wait anything for more heavy concurrency
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_$REPLICA DROP COLUMN value$DROP SETTINGS replication_alter_partitions_sync=0"
sleep 0.$RANDOM
}
function optimize_thread()
{
while true; do
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE concurrent_alter_add_drop_$REPLICA FINAL SETTINGS replication_alter_partitions_sync=0";
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE concurrent_alter_add_drop_$REPLICA FINAL SETTINGS replication_alter_partitions_sync=0"
sleep 0.$RANDOM
}
function insert_thread()
{
while true; do
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_$REPLICA VALUES($RANDOM, 7)"
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_$REPLICA VALUES($RANDOM, 7)"
sleep 0.$RANDOM
}
echo "Starting alters"
export -f alter_thread;
export -f optimize_thread;
export -f insert_thread;
export -f alter_thread
export -f optimize_thread
export -f insert_thread
TIMEOUT=30
# Sometimes we detach and attach tables
timeout $TIMEOUT bash -c alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c optimize_thread 2> /dev/null &
timeout $TIMEOUT bash -c optimize_thread 2> /dev/null &
timeout $TIMEOUT bash -c optimize_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT optimize_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT optimize_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT optimize_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
wait

View File

@ -12,11 +12,24 @@ for i in $(seq $REPLICAS); do
done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_detach_$i (key UInt64, value1 UInt8, value2 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_detach', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000,temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0"
$CLICKHOUSE_CLIENT -nm --query "
CREATE TABLE concurrent_alter_detach_$i (key UInt64, value1 UInt8, value2 UInt8)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_detach', '$i')
ORDER BY key
SETTINGS
max_replicated_mutations_in_queue=1000,
number_of_free_entries_in_pool_to_execute_mutation=0,
max_replicated_merges_in_queue=1000,
temporary_directories_lifetime=10,
cleanup_delay_period=3,
cleanup_delay_period_random_add=0;
"
done
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_detach_1 SELECT number, number + 10, number from numbers(10)"
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_detach_1 SELECT number, number + 10, number from numbers(10, 40)"
$CLICKHOUSE_CLIENT -nm --query "
INSERT INTO concurrent_alter_detach_1 SELECT number, number + 10, number from numbers(10);
INSERT INTO concurrent_alter_detach_1 SELECT number, number + 10, number from numbers(10, 40);
"
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_detach_$i"
@ -31,12 +44,11 @@ INITIAL_SUM=$($CLICKHOUSE_CLIENT --query "SELECT SUM(value1) FROM concurrent_alt
function correct_alter_thread()
{
TYPES=(Float64 String UInt8 UInt32)
while true; do
REPLICA=$(($RANDOM % 3 + 1))
TYPE=${TYPES[$RANDOM % ${#TYPES[@]} ]}
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_$REPLICA MODIFY COLUMN value1 $TYPE SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 3 + 1))
TYPE=${TYPES[$RANDOM % ${#TYPES[@]} ]}
# additionaly we don't wait anything for more heavy concurrency
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_$REPLICA MODIFY COLUMN value1 $TYPE SETTINGS replication_alter_partitions_sync=0"
sleep 0.$RANDOM
}
# This thread add some data to table. After we finish we can check, that
@ -44,43 +56,38 @@ function correct_alter_thread()
# insert queries will fail sometime because of wrong types.
function insert_thread()
{
VALUES=(7.0 7 '7')
while true; do
REPLICA=$(($RANDOM % 3 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_detach_$REPLICA VALUES($RANDOM, $VALUE, $VALUE)"
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 3 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_detach_$REPLICA VALUES($RANDOM, $VALUE, $VALUE)"
sleep 0.$RANDOM
}
function detach_attach_thread()
{
while true; do
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_alter_detach_$REPLICA"
sleep 0.$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_detach_$REPLICA"
done
REPLICA=$(($RANDOM % 3 + 1))
$CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_alter_detach_$REPLICA"
sleep 0.$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_detach_$REPLICA"
}
echo "Starting alters"
export -f correct_alter_thread;
export -f insert_thread;
export -f detach_attach_thread;
export -f correct_alter_thread
export -f insert_thread
export -f detach_attach_thread
TIMEOUT=15
# Sometimes we detach and attach tables
timeout $TIMEOUT bash -c detach_attach_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT detach_attach_thread 2> /dev/null &
timeout $TIMEOUT bash -c correct_alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
wait

View File

@ -14,7 +14,11 @@ for i in $(seq $REPLICAS); do
done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_mt_$i (key UInt64, value1 UInt64, value2 Int32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_mt', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000"
$CLICKHOUSE_CLIENT -nm --query "
CREATE TABLE concurrent_alter_mt_$i (key UInt64, value1 UInt64, value2 Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_mt', '$i')
ORDER BY key
SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000"
done
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_mt_1 SELECT number, number + 10, number from numbers(10)"
@ -36,12 +40,10 @@ INITIAL_SUM=$($CLICKHOUSE_CLIENT --query "SELECT SUM(value1) FROM concurrent_alt
function correct_alter_thread()
{
TYPES=(Float64 String UInt8 UInt32)
while true; do
REPLICA=$(($RANDOM % 5 + 1))
TYPE=${TYPES[$RANDOM % ${#TYPES[@]} ]}
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_$REPLICA MODIFY COLUMN value1 $TYPE SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 5 + 1))
TYPE=${TYPES[$RANDOM % ${#TYPES[@]} ]}
$CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_$REPLICA MODIFY COLUMN value1 $TYPE SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency
sleep 0.$RANDOM
}
# This thread add some data to table. After we finish we can check, that
@ -49,56 +51,49 @@ function correct_alter_thread()
# insert queries will fail sometime because of wrong types.
function insert_thread()
{
VALUES=(7.0 7 '7')
while true; do
REPLICA=$(($RANDOM % 5 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_mt_$REPLICA VALUES($RANDOM, $VALUE, $VALUE)"
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 5 + 1))
VALUE=${VALUES[$RANDOM % ${#VALUES[@]} ]}
$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_mt_$REPLICA VALUES($RANDOM, $VALUE, $VALUE)"
sleep 0.$RANDOM
}
# Some select load, to be sure, that our selects work in concurrent execution with alters
function select_thread()
{
while true; do
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) FROM concurrent_alter_mt_$REPLICA" 1>/dev/null
sleep 0.$RANDOM
done
REPLICA=$(($RANDOM % 5 + 1))
$CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) FROM concurrent_alter_mt_$REPLICA" 1>/dev/null
sleep 0.$RANDOM
}
echo "Starting alters"
export -f correct_alter_thread;
export -f insert_thread;
export -f select_thread;
export -f correct_alter_thread
export -f insert_thread
export -f select_thread
TIMEOUT=30
# Selects should run successfully
timeout $TIMEOUT bash -c select_thread &
timeout $TIMEOUT bash -c select_thread &
timeout $TIMEOUT bash -c select_thread &
clickhouse_client_loop_timeout $TIMEOUT select_thread &
clickhouse_client_loop_timeout $TIMEOUT select_thread &
clickhouse_client_loop_timeout $TIMEOUT select_thread &
clickhouse_client_loop_timeout $TIMEOUT correct_alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT correct_alter_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c correct_alter_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
timeout $TIMEOUT bash -c insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT insert_thread 2> /dev/null &
wait

View File

@ -18,6 +18,6 @@ while [[ $i -lt $retries ]]; do
# 10 less then 20 seconds (20 streams), but long enough to cover possible load peaks
# "$@" left to pass manual options (like --experimental_use_processors 0) during manual testing
timeout 10s ${CLICKHOUSE_CLIENT} "${opts[@]}" "$@" && break
clickhouse_client_timeout 10s ${CLICKHOUSE_CLIENT} "${opts[@]}" "$@" && break
((++i))
done

View File

@ -9,54 +9,53 @@ set -e
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT -q "INSERT INTO concurrent_optimize_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(10000)";
done
$CLICKHOUSE_CLIENT -q "INSERT INTO concurrent_optimize_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(10000)"
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE concurrent_optimize_table FINAL";
sleep 0.$RANDOM;
done
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE concurrent_optimize_table FINAL"
sleep 0.$RANDOM
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS concurrent_optimize_table;
CREATE TABLE concurrent_optimize_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_optimize_table', '1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;";
sleep 0.$RANDOM;
sleep 0.$RANDOM;
sleep 0.$RANDOM;
done
$CLICKHOUSE_CLIENT -mn -q "
DROP TABLE IF EXISTS concurrent_optimize_table;
CREATE TABLE concurrent_optimize_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16))
ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_optimize_table', '1')
ORDER BY a
PARTITION BY b % 10
SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;";
sleep 0.$RANDOM
sleep 0.$RANDOM
sleep 0.$RANDOM
}
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread1
export -f thread2
export -f thread3
TIMEOUT=15
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread3 2> /dev/null &
wait

View File

@ -14,35 +14,29 @@ done
function rename_thread_1()
{
while true; do
$CLICKHOUSE_CLIENT -q "RENAME TABLE replica_01108_1 TO replica_01108_1_tmp,
replica_01108_2 TO replica_01108_2_tmp,
replica_01108_3 TO replica_01108_3_tmp,
replica_01108_4 TO replica_01108_4_tmp";
sleep 0.$RANDOM;
done
$CLICKHOUSE_CLIENT -q "RENAME TABLE replica_01108_1 TO replica_01108_1_tmp,
replica_01108_2 TO replica_01108_2_tmp,
replica_01108_3 TO replica_01108_3_tmp,
replica_01108_4 TO replica_01108_4_tmp"
sleep 0.$RANDOM
}
function rename_thread_2()
{
while true; do
$CLICKHOUSE_CLIENT -q "RENAME TABLE replica_01108_1_tmp TO replica_01108_2,
replica_01108_2_tmp TO replica_01108_3,
replica_01108_3_tmp TO replica_01108_4,
replica_01108_4_tmp TO replica_01108_1";
sleep 0.$RANDOM;
done
$CLICKHOUSE_CLIENT -q "RENAME TABLE replica_01108_1_tmp TO replica_01108_2,
replica_01108_2_tmp TO replica_01108_3,
replica_01108_3_tmp TO replica_01108_4,
replica_01108_4_tmp TO replica_01108_1"
sleep 0.$RANDOM
}
function restart_replicas_loop()
{
while true; do
for i in $(seq 4); do
$CLICKHOUSE_CLIENT -q "SYSTEM RESTART REPLICA replica_01108_${i}";
$CLICKHOUSE_CLIENT -q "SYSTEM RESTART REPLICA replica_01108_${i}_tmp";
done
sleep 0.$RANDOM;
for i in $(seq 4); do
$CLICKHOUSE_CLIENT -q "SYSTEM RESTART REPLICA replica_01108_${i}"
$CLICKHOUSE_CLIENT -q "SYSTEM RESTART REPLICA replica_01108_${i}_tmp"
done
sleep 0.$RANDOM
}
function restart_thread_1()
{
@ -54,17 +48,17 @@ function restart_thread_2()
restart_replicas_loop
}
export -f rename_thread_1;
export -f rename_thread_2;
export -f restart_thread_1;
export -f restart_thread_2;
export -f rename_thread_1
export -f rename_thread_2
export -f restart_thread_1
export -f restart_thread_2
TIMEOUT=10
timeout $TIMEOUT bash -c rename_thread_1 2> /dev/null &
timeout $TIMEOUT bash -c rename_thread_2 2> /dev/null &
timeout $TIMEOUT bash -c restart_thread_1 2> /dev/null &
timeout $TIMEOUT bash -c restart_thread_2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT rename_thread_1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT rename_thread_2 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT restart_thread_1 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT restart_thread_2 2> /dev/null &
wait

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
# Tags: no-parallel
# Tags: no-parallel, no-fasttest
# Tag no-fasttest: 45 seconds running
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -13,31 +13,30 @@ $CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01150"
$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t1 (x UInt64, s Array(Nullable(String))) ENGINE = Memory"
$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t2 (x UInt64, s Array(Nullable(String))) ENGINE = Memory"
function thread_detach_attach {
while true; do
$CLICKHOUSE_CLIENT --query "DETACH DATABASE test_01150" 2>&1 | grep -v -F -e 'Received exception from server' -e 'Code: 219' -e '(query: '
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH DATABASE test_01150" 2>&1 | grep -v -F -e 'Received exception from server' -e 'Code: 82' -e '(query: '
sleep 0.0$RANDOM
done
function thread_detach_attach()
{
$CLICKHOUSE_CLIENT --query "DETACH DATABASE test_01150" 2>&1 | grep -v -F -e 'Received exception from server' -e 'Code: 219' -e '(query: '
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "ATTACH DATABASE test_01150" 2>&1 | grep -v -F -e 'Received exception from server' -e 'Code: 82' -e '(query: '
sleep 0.0$RANDOM
}
function thread_rename {
while true; do
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t1 TO test_01150.t2_tmp, test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
done
function thread_rename()
{
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t1 TO test_01150.t2_tmp, test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
$CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F -e 'Received exception from server' -e '(query: ' | grep -v -P 'Code: (81|60|57|521)'
sleep 0.0$RANDOM
}
export -f thread_detach_attach
export -f thread_rename
timeout 20 bash -c "thread_detach_attach" &
timeout 20 bash -c 'thread_rename' &
clickhouse_client_loop_timeout 20 thread_detach_attach &
clickhouse_client_loop_timeout 20 thread_rename &
wait
sleep 1

View File

@ -26,99 +26,85 @@ wait
#function create_drop_thread()
#{
# while true; do
# REPLICA=$(($RANDOM % 16))
# $CLICKHOUSE_CLIENT -q "DROP TABLE src_$REPLICA;"
# arr=("$@")
# engine=${arr[$RANDOM % ${#arr[@]}]}
# $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$REPLICA (p UInt64, k UInt64, v UInt64) ENGINE=$engine PARTITION BY p % 10 ORDER BY k"
# sleep 0.$RANDOM;
# done
# REPLICA=$(($RANDOM % 16))
# $CLICKHOUSE_CLIENT -q "DROP TABLE src_$REPLICA;"
# arr=("$@")
# engine=${arr[$RANDOM % ${#arr[@]}]}
# $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$REPLICA (p UInt64, k UInt64, v UInt64) ENGINE=$engine PARTITION BY p % 10 ORDER BY k"
# sleep 0.$RANDOM
#}
function insert_thread()
{
while true; do
REPLICA=$(($RANDOM % 16))
LIMIT=$(($RANDOM % 100))
$CLICKHOUSE_CLIENT -q "INSERT INTO $1_$REPLICA SELECT * FROM generateRandom('p UInt64, k UInt64, v UInt64') LIMIT $LIMIT" 2>/dev/null
done
REPLICA=$(($RANDOM % 16))
LIMIT=$(($RANDOM % 100))
$CLICKHOUSE_CLIENT -q "INSERT INTO $1_$REPLICA SELECT * FROM generateRandom('p UInt64, k UInt64, v UInt64') LIMIT $LIMIT" 2>/dev/null
}
function move_partition_src_dst_thread()
{
while true; do
FROM_REPLICA=$(($RANDOM % 16))
TO_REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE src_$FROM_REPLICA MOVE PARTITION $PARTITION TO TABLE dst_$TO_REPLICA" 2>/dev/null
sleep 0.$RANDOM;
done
FROM_REPLICA=$(($RANDOM % 16))
TO_REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE src_$FROM_REPLICA MOVE PARTITION $PARTITION TO TABLE dst_$TO_REPLICA" 2>/dev/null
sleep 0.$RANDOM
}
function replace_partition_src_src_thread()
{
while true; do
FROM_REPLICA=$(($RANDOM % 16))
TO_REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE src_$TO_REPLICA REPLACE PARTITION $PARTITION FROM src_$FROM_REPLICA" 2>/dev/null
sleep 0.$RANDOM;
done
FROM_REPLICA=$(($RANDOM % 16))
TO_REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE src_$TO_REPLICA REPLACE PARTITION $PARTITION FROM src_$FROM_REPLICA" 2>/dev/null
sleep 0.$RANDOM
}
function drop_partition_thread()
{
while true; do
REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PARTITION $PARTITION" 2>/dev/null
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 16))
PARTITION=$(($RANDOM % 10))
$CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PARTITION $PARTITION" 2>/dev/null
sleep 0.$RANDOM
}
function optimize_thread()
{
while true; do
REPLICA=$(($RANDOM % 16))
TABLE="src"
if (( RANDOM % 2 )); then
TABLE="dst"
fi
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE ${TABLE}_$REPLICA" 2>/dev/null
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 16))
TABLE="src"
if (( RANDOM % 2 )); then
TABLE="dst"
fi
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE ${TABLE}_$REPLICA" 2>/dev/null
sleep 0.$RANDOM
}
function drop_part_thread()
{
while true; do
REPLICA=$(($RANDOM % 16))
part=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.parts WHERE active AND database='$CLICKHOUSE_DATABASE' and table='dst_$REPLICA' ORDER BY rand() LIMIT 1")
$CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PART '$part'" 2>/dev/null
sleep 0.$RANDOM;
done
REPLICA=$(($RANDOM % 16))
part=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.parts WHERE active AND database='$CLICKHOUSE_DATABASE' and table='dst_$REPLICA' ORDER BY rand() LIMIT 1")
$CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PART '$part'" 2>/dev/null
sleep 0.$RANDOM
}
#export -f create_drop_thread;
export -f insert_thread;
export -f move_partition_src_dst_thread;
export -f replace_partition_src_src_thread;
export -f drop_partition_thread;
export -f optimize_thread;
export -f drop_part_thread;
export -f insert_thread
export -f move_partition_src_dst_thread
export -f replace_partition_src_src_thread
export -f drop_partition_thread
export -f optimize_thread
export -f drop_part_thread
TIMEOUT=60
#timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" &
timeout $TIMEOUT bash -c 'insert_thread src' &
timeout $TIMEOUT bash -c 'insert_thread src' &
timeout $TIMEOUT bash -c 'insert_thread dst' &
timeout $TIMEOUT bash -c move_partition_src_dst_thread &
timeout $TIMEOUT bash -c replace_partition_src_src_thread &
timeout $TIMEOUT bash -c drop_partition_thread &
timeout $TIMEOUT bash -c optimize_thread &
timeout $TIMEOUT bash -c drop_part_thread &
#clickhouse_client_loop_timeout $TIMEOUT "create_drop_thread ${engines[@]}" &
clickhouse_client_loop_timeout $TIMEOUT insert_thread src &
clickhouse_client_loop_timeout $TIMEOUT insert_thread src &
clickhouse_client_loop_timeout $TIMEOUT insert_thread dst &
clickhouse_client_loop_timeout $TIMEOUT move_partition_src_dst_thread &
clickhouse_client_loop_timeout $TIMEOUT replace_partition_src_src_thread &
clickhouse_client_loop_timeout $TIMEOUT drop_partition_thread &
clickhouse_client_loop_timeout $TIMEOUT optimize_thread &
clickhouse_client_loop_timeout $TIMEOUT drop_part_thread &
wait
check_replication_consistency "dst_" "count(), sum(p), sum(k), sum(v)"

View File

@ -12,36 +12,30 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)"
function thread_insert()
{
while true; do
$CLICKHOUSE_CLIENT -q "insert into mt values (rand())";
done
$CLICKHOUSE_CLIENT -q "insert into mt values (rand())";
}
function thread_detach_attach()
{
while true; do
$CLICKHOUSE_CLIENT -q "alter table mt detach partition id 'all'";
$CLICKHOUSE_CLIENT -q "alter table mt attach partition id 'all'";
done
$CLICKHOUSE_CLIENT -q "alter table mt detach partition id 'all'";
$CLICKHOUSE_CLIENT -q "alter table mt attach partition id 'all'";
}
function thread_drop_detached()
{
while true; do
$CLICKHOUSE_CLIENT --allow_drop_detached -q "alter table mt drop detached partition id 'all'";
done
$CLICKHOUSE_CLIENT --allow_drop_detached -q "alter table mt drop detached partition id 'all'";
}
export -f thread_insert;
export -f thread_detach_attach;
export -f thread_drop_detached;
export -f thread_insert
export -f thread_detach_attach
export -f thread_drop_detached
TIMEOUT=10
timeout $TIMEOUT bash -c thread_insert &
timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null &
timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null &
timeout $TIMEOUT bash -c thread_drop_detached 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread_insert &
clickhouse_client_loop_timeout $TIMEOUT thread_detach_attach 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread_detach_attach 2> /dev/null &
clickhouse_client_loop_timeout $TIMEOUT thread_drop_detached 2> /dev/null &
wait

Some files were not shown because too many files have changed in this diff Show More