mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Merge branch 'master' into backport-by-label
This commit is contained in:
commit
c13cce7890
@ -25,7 +25,7 @@
|
||||
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
#endif
|
||||
@ -1266,7 +1266,7 @@ public:
|
||||
};
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54450)
|
||||
SET(VERSION_REVISION 54451)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 5)
|
||||
SET(VERSION_MINOR 6)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
|
||||
SET(VERSION_DESCRIBE v21.5.1.1-prestable)
|
||||
SET(VERSION_STRING 21.5.1.1)
|
||||
SET(VERSION_GITHASH 96fced4c3cf432fb0b401d2ab01f0c56e5f74a96)
|
||||
SET(VERSION_DESCRIBE v21.6.1.1-prestable)
|
||||
SET(VERSION_STRING 21.6.1.1)
|
||||
# end of autochange
|
||||
|
@ -171,6 +171,7 @@ elseif (COMPILER_GCC)
|
||||
add_cxx_compile_options(-Wtrampolines)
|
||||
# Obvious
|
||||
add_cxx_compile_options(-Wunused)
|
||||
add_cxx_compile_options(-Wundef)
|
||||
# Warn if vector operation is not implemented via SIMD capabilities of the architecture
|
||||
add_cxx_compile_options(-Wvector-operation-performance)
|
||||
# XXX: libstdc++ has some of these for 3way compare
|
||||
|
2
contrib/datasketches-cpp
vendored
2
contrib/datasketches-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 45885c0c8c0807bb9480886d60ca7042000a4c43
|
||||
Subproject commit f915d35b2de676683493c86c585141a1e1c83334
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (21.5.1.1) unstable; urgency=low
|
||||
clickhouse (21.6.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Fri, 02 Apr 2021 18:34:26 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 20 Apr 2021 01:48:16 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.5.1.*
|
||||
ARG version=21.6.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -312,8 +312,6 @@ function run_tests
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
01798_uniq_theta_sketch
|
||||
01799_long_uniq_theta_sketch
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
|
@ -159,6 +159,9 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
|
||||
!!! note "Note"
|
||||
You can change path to the CatBoost model configuration later without restarting server.
|
||||
|
||||
## 4. Run the Model Inference from SQL {#run-model-inference}
|
||||
|
||||
For test model run the ClickHouse client `$ clickhouse client`.
|
||||
|
@ -4,7 +4,9 @@ Contains information about columns in all the tables.
|
||||
|
||||
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
|
||||
|
||||
The `system.columns` table contains the following columns (the column type is shown in brackets):
|
||||
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
@ -26,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) select * from system.columns LIMIT 2 FORMAT Vertical;
|
||||
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
@ -65,8 +67,6 @@ is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
|
||||
2 rows in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->
|
||||
|
@ -1,59 +1,65 @@
|
||||
# system.tables {#system-tables}
|
||||
|
||||
Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`.
|
||||
Contains metadata of each table that the server knows about.
|
||||
|
||||
This table contains the following columns (the column type is shown in brackets):
|
||||
[Detached](../../sql-reference/statements/detach.md) tables are not shown in `system.tables`.
|
||||
|
||||
- `database` (String) — The name of the database the table is in.
|
||||
[Temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.tables` only in those session where they have been created. They are shown with the empty `database` field and with the `is_temporary` flag switched on.
|
||||
|
||||
- `name` (String) — Table name.
|
||||
Columns:
|
||||
|
||||
- `engine` (String) — Table engine name (without parameters).
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
|
||||
|
||||
- `is_temporary` (UInt8) - Flag that indicates whether the table is temporary.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
|
||||
- `data_path` (String) - Path to the table data in the file system.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters).
|
||||
|
||||
- `metadata_path` (String) - Path to the table metadata in the file system.
|
||||
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary.
|
||||
|
||||
- `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata.
|
||||
- `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system.
|
||||
|
||||
- `dependencies_database` (Array(String)) - Database dependencies.
|
||||
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system.
|
||||
|
||||
- `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
|
||||
- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.
|
||||
|
||||
- `create_table_query` (String) - The query that was used to create the table.
|
||||
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
|
||||
|
||||
- `engine_full` (String) - Parameters of the table engine.
|
||||
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
|
||||
|
||||
- `partition_key` (String) - The partition key expression specified in the table.
|
||||
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.
|
||||
|
||||
- `sorting_key` (String) - The sorting key expression specified in the table.
|
||||
- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine.
|
||||
|
||||
- `primary_key` (String) - The primary key expression specified in the table.
|
||||
- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table.
|
||||
|
||||
- `sampling_key` (String) - The sampling key expression specified in the table.
|
||||
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table.
|
||||
|
||||
- `storage_policy` (String) - The storage policy:
|
||||
- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table.
|
||||
|
||||
- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table.
|
||||
|
||||
- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy:
|
||||
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
|
||||
|
||||
- `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage).
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).
|
||||
|
||||
- If the table stores data on disk, returns used space on disk (i.e. compressed).
|
||||
- If the table stores data in memory, returns approximated number of used bytes in memory.
|
||||
|
||||
- `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
|
||||
- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables).
|
||||
|
||||
- `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
|
||||
- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
|
||||
|
||||
The `system.tables` table is used in `SHOW TABLES` query implementation.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
@ -100,8 +106,6 @@ sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
|
||||
2 rows in set. Elapsed: 0.004 sec.
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->
|
||||
|
@ -6,7 +6,7 @@ toc_priority: 207
|
||||
|
||||
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm.
|
||||
|
||||
The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
|
||||
Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
|
||||
|
||||
The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.
|
||||
|
||||
|
@ -38,4 +38,3 @@ We recommend using this function in almost all scenarios.
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -49,4 +49,3 @@ Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -23,4 +23,3 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -37,4 +37,3 @@ We don’t recommend using this function. In most cases, use the [uniq](../../..
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
||||
- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
|
||||
|
@ -1,39 +0,0 @@
|
||||
---
|
||||
toc_priority: 195
|
||||
---
|
||||
|
||||
# uniqThetaSketch {#agg_function-uniqthetasketch}
|
||||
|
||||
Calculates the approximate number of different argument values, using the [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html).
|
||||
|
||||
``` sql
|
||||
uniqThetaSketch(x[, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Function:
|
||||
|
||||
- Calculates a hash for all parameters in the aggregate, then uses it in calculations.
|
||||
|
||||
- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values.
|
||||
|
||||
4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB.
|
||||
|
||||
- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
|
@ -16,46 +16,60 @@ The following assumptions are made:
|
||||
|
||||
## visitParamHas(params, name) {#visitparamhasparams-name}
|
||||
|
||||
Checks whether there is a field with the ‘name’ name.
|
||||
Checks whether there is a field with the `name` name.
|
||||
|
||||
Alias: `simpleJSONHas`.
|
||||
|
||||
## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}
|
||||
|
||||
Parses UInt64 from the value of the field named ‘name’. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
|
||||
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
|
||||
|
||||
Alias: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name) {#visitparamextractintparams-name}
|
||||
|
||||
The same as for Int64.
|
||||
|
||||
Alias: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}
|
||||
|
||||
The same as for Float64.
|
||||
|
||||
Alias: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}
|
||||
|
||||
Parses a true/false value. The result is UInt8.
|
||||
|
||||
Alias: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}
|
||||
|
||||
Returns the value of a field, including separators.
|
||||
|
||||
Alias: `simpleJSONExtractRaw`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name) {#visitparamextractstringparams-name}
|
||||
|
||||
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
|
||||
|
||||
Alias: `simpleJSONExtractString`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
```
|
||||
|
||||
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
|
||||
|
@ -74,6 +74,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
|
||||
|
||||
Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
|
||||
|
||||
!!! warning "Warning"
|
||||
You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
@ -180,7 +183,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## See Also
|
||||
**See Also**
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
|
@ -158,7 +158,9 @@ FROM amazon_train
|
||||
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
|
||||
!!! note "Примечание"
|
||||
Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
|
||||
|
||||
## 4. Запустите вывод модели из SQL {#run-model-inference}
|
||||
|
||||
Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.
|
||||
|
||||
Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
|
||||
Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое.
|
||||
|
||||
Cтолбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
@ -23,3 +25,46 @@
|
||||
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
|
||||
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
database: system
|
||||
table: aggregate_function_combinators
|
||||
name: name
|
||||
type: String
|
||||
default_kind:
|
||||
default_expression:
|
||||
data_compressed_bytes: 0
|
||||
data_uncompressed_bytes: 0
|
||||
marks_bytes: 0
|
||||
comment:
|
||||
is_in_partition_key: 0
|
||||
is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
database: system
|
||||
table: aggregate_function_combinators
|
||||
name: is_internal
|
||||
type: UInt8
|
||||
default_kind:
|
||||
default_expression:
|
||||
data_compressed_bytes: 0
|
||||
data_uncompressed_bytes: 0
|
||||
marks_bytes: 0
|
||||
comment:
|
||||
is_in_partition_key: 0
|
||||
is_in_sorting_key: 0
|
||||
is_in_primary_key: 0
|
||||
is_in_sampling_key: 0
|
||||
compression_codec:
|
||||
```
|
||||
|
@ -1,39 +1,94 @@
|
||||
# system.tables {#system-tables}
|
||||
|
||||
Содержит метаданные каждой таблицы, о которой знает сервер. Отсоединённые таблицы не отображаются в `system.tables`.
|
||||
Содержит метаданные каждой таблицы, о которой знает сервер.
|
||||
|
||||
Эта таблица содержит следующие столбцы (тип столбца показан в скобках):
|
||||
Отсоединённые таблицы ([DETACH](../../sql-reference/statements/detach.md)) не отображаются в `system.tables`.
|
||||
|
||||
- `database String` — имя базы данных, в которой находится таблица.
|
||||
- `name` (String) — имя таблицы.
|
||||
- `engine` (String) — движок таблицы (без параметров).
|
||||
- `is_temporary` (UInt8) — флаг, указывающий на то, временная это таблица или нет.
|
||||
- `data_path` (String) — путь к данным таблицы в файловой системе.
|
||||
- `metadata_path` (String) — путь к табличным метаданным в файловой системе.
|
||||
- `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных.
|
||||
- `dependencies_database` (Array(String)) — зависимости базы данных.
|
||||
- `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
|
||||
- `create_table_query` (String) — запрос, которым создавалась таблица.
|
||||
- `engine_full` (String) — параметры табличного движка.
|
||||
- `partition_key` (String) — ключ партиционирования таблицы.
|
||||
- `sorting_key` (String) — ключ сортировки таблицы.
|
||||
- `primary_key` (String) - первичный ключ таблицы.
|
||||
- `sampling_key` (String) — ключ сэмплирования таблицы.
|
||||
- `storage_policy` (String) - политика хранения данных:
|
||||
Информация о [временных таблицах](../../sql-reference/statements/create/table.md#temporary-tables) содержится в `system.tables` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких таблиц пустое, а флаг `is_temporary` включен.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
|
||||
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
|
||||
- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
|
||||
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
|
||||
- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
|
||||
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
|
||||
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
|
||||
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
|
||||
- `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
|
||||
- `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
|
||||
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
|
||||
- `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
|
||||
- `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
|
||||
- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:
|
||||
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `NULL` (включая базовую таблицу `Buffer`).
|
||||
|
||||
- `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `NULL` (не включает в себя никакого базового хранилища).
|
||||
|
||||
- Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
|
||||
- Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.
|
||||
|
||||
- `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
- `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
database: system
|
||||
name: aggregate_function_combinators
|
||||
uuid: 00000000-0000-0000-0000-000000000000
|
||||
engine: SystemAggregateFunctionCombinators
|
||||
is_temporary: 0
|
||||
data_paths: []
|
||||
metadata_path: /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
|
||||
metadata_modification_time: 1970-01-01 03:00:00
|
||||
dependencies_database: []
|
||||
dependencies_table: []
|
||||
create_table_query:
|
||||
engine_full:
|
||||
partition_key:
|
||||
sorting_key:
|
||||
primary_key:
|
||||
sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
database: system
|
||||
name: asynchronous_metrics
|
||||
uuid: 00000000-0000-0000-0000-000000000000
|
||||
engine: SystemAsynchronousMetrics
|
||||
is_temporary: 0
|
||||
data_paths: []
|
||||
metadata_path: /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
|
||||
metadata_modification_time: 1970-01-01 03:00:00
|
||||
dependencies_database: []
|
||||
dependencies_table: []
|
||||
create_table_query:
|
||||
engine_full:
|
||||
partition_key:
|
||||
sorting_key:
|
||||
primary_key:
|
||||
sampling_key:
|
||||
storage_policy:
|
||||
total_rows: ᴺᵁᴸᴸ
|
||||
total_bytes: ᴺᵁᴸᴸ
|
||||
```
|
||||
|
@ -16,51 +16,65 @@ toc_title: JSON
|
||||
|
||||
## visitParamHas(params, name) {#visitparamhasparams-name}
|
||||
|
||||
Проверить наличие поля с именем name.
|
||||
Проверяет наличие поля с именем `name`.
|
||||
|
||||
Алиас: `simpleJSONHas`.
|
||||
|
||||
## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}
|
||||
|
||||
Распарсить UInt64 из значения поля с именем name. Если поле строковое - попытаться распарсить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то вернуть 0.
|
||||
Пытается выделить число типа UInt64 из значения поля с именем `name`. Если поле строковое, пытается выделить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то возвращает 0.
|
||||
|
||||
Алиас: `simpleJSONExtractUInt`.
|
||||
|
||||
## visitParamExtractInt(params, name) {#visitparamextractintparams-name}
|
||||
|
||||
Аналогично для Int64.
|
||||
|
||||
Алиас: `simpleJSONExtractInt`.
|
||||
|
||||
## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}
|
||||
|
||||
Аналогично для Float64.
|
||||
|
||||
Алиас: `simpleJSONExtractFloat`.
|
||||
|
||||
## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}
|
||||
|
||||
Распарсить значение true/false. Результат - UInt8.
|
||||
Пытается выделить значение true/false. Результат — UInt8.
|
||||
|
||||
Алиас: `simpleJSONExtractBool`.
|
||||
|
||||
## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}
|
||||
|
||||
Вернуть значение поля, включая разделители.
|
||||
Возвращает значение поля, включая разделители.
|
||||
|
||||
Алиас: `simpleJSONExtractRaw`.
|
||||
|
||||
Примеры:
|
||||
|
||||
``` sql
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
|
||||
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
|
||||
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
|
||||
```
|
||||
|
||||
## visitParamExtractString(params, name) {#visitparamextractstringparams-name}
|
||||
|
||||
Распарсить строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
|
||||
Разбирает строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
|
||||
|
||||
Алиас: `simpleJSONExtractString`.
|
||||
|
||||
Примеры:
|
||||
|
||||
``` sql
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = ''
|
||||
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
|
||||
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
|
||||
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
|
||||
visitParamExtractString('{"abc":"hello}', 'abc') = '';
|
||||
```
|
||||
|
||||
На данный момент, не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
|
||||
На данный момент не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
|
||||
|
||||
Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан под более сложные требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо.
|
||||
Следующие функции используют [simdjson](https://github.com/lemire/simdjson), который разработан под более сложные требования для разбора JSON. Упомянутое выше допущение 2 по-прежнему применимо.
|
||||
|
||||
## isValidJSON(json) {#isvalidjsonjson}
|
||||
|
||||
@ -292,4 +306,3 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
|
||||
│ [('d','"hello"'),('f','"world"')] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -63,6 +63,9 @@ DROP COLUMN [IF EXISTS] name
|
||||
|
||||
Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
@ -155,7 +158,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## Смотрите также
|
||||
**Смотрите также**
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
|
@ -247,7 +247,7 @@ CREATE TABLE codec_example
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
```
|
||||
## Временные таблицы {#vremennye-tablitsy}
|
||||
## Временные таблицы {#temporary-tables}
|
||||
|
||||
ClickHouse поддерживает временные таблицы со следующими характеристиками:
|
||||
|
||||
|
@ -19,6 +19,7 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_storages_system
|
||||
clickhouse_table_functions
|
||||
string_utils
|
||||
jemalloc
|
||||
|
||||
${LINK_RESOURCE_LIB}
|
||||
|
||||
|
@ -101,6 +101,10 @@
|
||||
# include <Server/KeeperTCPHandlerFactory.h>
|
||||
#endif
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -109,11 +113,35 @@ namespace CurrentMetrics
|
||||
extern const Metric MaxDDLEntryID;
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC
|
||||
static bool jemallocOptionEnabled(const char *name)
|
||||
{
|
||||
bool value;
|
||||
size_t size = sizeof(value);
|
||||
|
||||
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
|
||||
throw Poco::SystemException("mallctl() failed");
|
||||
|
||||
return value;
|
||||
}
|
||||
#else
|
||||
static bool jemallocOptionEnabled(const char *) { return 0; }
|
||||
#endif
|
||||
|
||||
|
||||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
|
||||
if (jemallocOptionEnabled("opt.background_thread"))
|
||||
{
|
||||
LOG_ERROR(&app.logger(),
|
||||
"jemalloc.background_thread was requested, "
|
||||
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
|
||||
"and also background_thread is not compatible with ClickHouse watchdog "
|
||||
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
|
||||
}
|
||||
|
||||
/// Do not fork separate process from watchdog if we attached to terminal.
|
||||
/// Otherwise it breaks gdb usage.
|
||||
/// Can be overridden by environment variable (cannot use server config at this moment).
|
||||
|
@ -132,12 +132,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
|
||||
|
||||
factory.registerFunction("uniqExact",
|
||||
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
factory.registerFunction("uniqThetaSketch",
|
||||
{createAggregateFunctionUniq<AggregateFunctionUniqThetaSketchData, AggregateFunctionUniqThetaSketchData>, properties});
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,7 +22,6 @@
|
||||
|
||||
#include <AggregateFunctions/UniquesHashSet.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/ThetaSketchData.h>
|
||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||
|
||||
|
||||
@ -125,19 +124,6 @@ struct AggregateFunctionUniqExactData<String>
|
||||
};
|
||||
|
||||
|
||||
/// uniqThetaSketch
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
struct AggregateFunctionUniqThetaSketchData
|
||||
{
|
||||
using Set = ThetaSketchData<UInt64>;
|
||||
Set set;
|
||||
|
||||
static String getName() { return "uniqThetaSketch"; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
@ -203,12 +189,6 @@ struct OneAdder
|
||||
data.set.insert(key);
|
||||
}
|
||||
}
|
||||
#if USE_DATASKETCHES
|
||||
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqThetaSketchData>)
|
||||
{
|
||||
data.set.insertOriginal(column.getDataAt(row_num));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
@ -280,7 +280,7 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -163,7 +163,7 @@ public:
|
||||
sorted = false;
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wclass-memaccess"
|
||||
#endif
|
||||
@ -191,7 +191,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -1,119 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <memory>
|
||||
#include <theta_sketch.hpp> // Y_IGNORE
|
||||
#include <theta_union.hpp> // Y_IGNORE
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
template <typename Key>
|
||||
class ThetaSketchData : private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
std::unique_ptr<datasketches::update_theta_sketch> sk_update;
|
||||
std::unique_ptr<datasketches::theta_union> sk_union;
|
||||
|
||||
inline datasketches::update_theta_sketch * getSkUpdate()
|
||||
{
|
||||
if (!sk_update)
|
||||
sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
|
||||
return sk_update.get();
|
||||
}
|
||||
|
||||
inline datasketches::theta_union * getSkUnion()
|
||||
{
|
||||
if (!sk_union)
|
||||
sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
|
||||
return sk_union.get();
|
||||
}
|
||||
|
||||
public:
|
||||
using value_type = Key;
|
||||
|
||||
ThetaSketchData() = default;
|
||||
~ThetaSketchData() = default;
|
||||
|
||||
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
|
||||
void insertOriginal(const StringRef & value)
|
||||
{
|
||||
getSkUpdate()->update(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
|
||||
void insert(Key value)
|
||||
{
|
||||
getSkUpdate()->update(value);
|
||||
}
|
||||
|
||||
UInt64 size() const
|
||||
{
|
||||
if (sk_union)
|
||||
return static_cast<UInt64>(sk_union->get_result().get_estimate());
|
||||
else if (sk_update)
|
||||
return static_cast<UInt64>(sk_update->get_estimate());
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void merge(const ThetaSketchData & rhs)
|
||||
{
|
||||
datasketches::theta_union * u = getSkUnion();
|
||||
|
||||
if (sk_update)
|
||||
{
|
||||
u->update(*sk_update);
|
||||
sk_update.reset(nullptr);
|
||||
}
|
||||
|
||||
if (rhs.sk_update)
|
||||
u->update(*rhs.sk_update);
|
||||
else if (rhs.sk_union)
|
||||
u->update(rhs.sk_union->get_result());
|
||||
}
|
||||
|
||||
/// You can only call for an empty object.
|
||||
void read(DB::ReadBuffer & in)
|
||||
{
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
readVectorBinary(bytes, in);
|
||||
if (!bytes.empty())
|
||||
{
|
||||
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
||||
getSkUnion()->update(sk);
|
||||
}
|
||||
}
|
||||
|
||||
void write(DB::WriteBuffer & out) const
|
||||
{
|
||||
if (sk_update)
|
||||
{
|
||||
auto bytes = sk_update->compact().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else if (sk_union)
|
||||
{
|
||||
auto bytes = sk_union->get_result().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -50,6 +50,7 @@ SRCS(
|
||||
AggregateFunctionStatisticsSimple.cpp
|
||||
AggregateFunctionStudentTTest.cpp
|
||||
AggregateFunctionSum.cpp
|
||||
AggregateFunctionSumCount.cpp
|
||||
AggregateFunctionSumMap.cpp
|
||||
AggregateFunctionTopK.cpp
|
||||
AggregateFunctionUniq.cpp
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
}
|
||||
|
||||
/// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -128,7 +128,7 @@ public:
|
||||
offsets.push_back(new_size);
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -277,7 +277,7 @@ private:
|
||||
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
|
||||
* In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
|
||||
*/
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
|
||||
#endif
|
||||
@ -359,6 +359,6 @@ extern template class Allocator<true, false>;
|
||||
extern template class Allocator<false, true>;
|
||||
extern template class Allocator<true, true>;
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -19,7 +19,7 @@ namespace DB
|
||||
struct UInt128
|
||||
{
|
||||
/// Suppress gcc7 warnings: 'prev_key.DB::UInt128::low' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -92,7 +92,7 @@ struct UInt128
|
||||
return static_cast<T>(low);
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -150,7 +150,7 @@ struct DummyUInt256
|
||||
{
|
||||
|
||||
/// Suppress gcc7 warnings: 'prev_key.DB::UInt256::a' may be used uninitialized in this function
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -179,7 +179,7 @@ struct DummyUInt256
|
||||
bool operator== (const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
|
||||
bool operator!= (const UInt64 rhs) const { return !operator==(rhs); }
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -15,4 +15,3 @@
|
||||
#cmakedefine01 USE_GRPC
|
||||
#cmakedefine01 USE_STATS
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 USE_DATASKETCHES
|
||||
|
@ -1,5 +1,5 @@
|
||||
/// Bug in GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
@ -263,6 +263,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -69,7 +69,7 @@ static void aggregate1(Map & map, Source::const_iterator begin, Source::const_it
|
||||
++map[*it];
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -122,7 +122,7 @@ static void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source:
|
||||
}
|
||||
}
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -62,7 +62,7 @@ struct AggregateIndependent
|
||||
}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -115,7 +115,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
|
||||
}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -265,7 +265,7 @@ struct Creator
|
||||
void operator()(Value &) const {}
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -275,7 +275,7 @@ struct Updater
|
||||
void operator()(Value & x) const { ++x; }
|
||||
};
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -96,7 +96,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
|
||||
template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -159,7 +159,7 @@ private:
|
||||
T dec;
|
||||
UInt32 scale;
|
||||
};
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -563,7 +563,7 @@ public:
|
||||
{
|
||||
case Types::Null: return f(field.template get<Null>());
|
||||
// gcc 8.2.1
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
@ -583,7 +583,7 @@ public:
|
||||
case Types::Int128: return f(field.template get<Int128>());
|
||||
case Types::UInt256: return f(field.template get<UInt256>());
|
||||
case Types::Int256: return f(field.template get<Int256>());
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
@ -70,6 +70,7 @@ class IColumn;
|
||||
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
|
||||
M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
|
||||
M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
|
||||
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
|
||||
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
|
||||
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
@ -446,6 +447,8 @@ class IColumn;
|
||||
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
|
||||
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
|
||||
M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
|
||||
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
\
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
\
|
||||
|
@ -15,7 +15,7 @@ namespace DB
|
||||
struct Null {};
|
||||
|
||||
/// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
@ -59,7 +59,7 @@ enum class TypeIndex
|
||||
LowCardinality,
|
||||
Map,
|
||||
};
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -198,7 +198,7 @@ ASTPtr DatabaseConnectionMySQL::getCreateDatabaseQuery() const
|
||||
|
||||
void DatabaseConnectionMySQL::fetchTablesIntoLocalCache(ContextPtr local_context) const
|
||||
{
|
||||
const auto & tables_with_modification_time = fetchTablesWithModificationTime();
|
||||
const auto & tables_with_modification_time = fetchTablesWithModificationTime(local_context);
|
||||
|
||||
destroyLocalCacheExtraTables(tables_with_modification_time);
|
||||
fetchLatestTablesStructureIntoCache(tables_with_modification_time, local_context);
|
||||
@ -252,7 +252,7 @@ void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(
|
||||
}
|
||||
}
|
||||
|
||||
std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime() const
|
||||
std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime(ContextPtr local_context) const
|
||||
{
|
||||
Block tables_status_sample_block
|
||||
{
|
||||
@ -268,7 +268,8 @@ std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTim
|
||||
" WHERE TABLE_SCHEMA = " << quote << database_name_in_mysql;
|
||||
|
||||
std::map<String, UInt64> tables_with_modification_time;
|
||||
MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(local_context->getSettingsRef());
|
||||
MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = result.read())
|
||||
{
|
||||
@ -292,7 +293,7 @@ DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector<String> & tabl
|
||||
mysql_pool,
|
||||
database_name_in_mysql,
|
||||
tables_name,
|
||||
settings.external_table_functions_use_nulls,
|
||||
settings,
|
||||
database_settings->mysql_datatypes_support_level);
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ private:
|
||||
|
||||
void fetchTablesIntoLocalCache(ContextPtr context) const;
|
||||
|
||||
std::map<String, UInt64> fetchTablesWithModificationTime() const;
|
||||
std::map<String, UInt64> fetchTablesWithModificationTime(ContextPtr local_context) const;
|
||||
|
||||
std::map<String, NamesAndTypesList> fetchTablesColumnsList(const std::vector<String> & tables_name, ContextPtr context) const;
|
||||
|
||||
|
@ -44,7 +44,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
mysqlxx::PoolWithFailover & pool,
|
||||
const String & database_name,
|
||||
const std::vector<String> & tables_name,
|
||||
bool external_table_functions_use_nulls,
|
||||
const Settings & settings,
|
||||
MultiEnum<MySQLDataTypesSupport> type_support)
|
||||
{
|
||||
std::map<String, NamesAndTypesList> tables_and_columns;
|
||||
@ -78,7 +78,8 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
" WHERE TABLE_SCHEMA = " << quote << database_name
|
||||
<< " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";
|
||||
|
||||
MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(settings);
|
||||
MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, mysql_input_stream_settings);
|
||||
while (Block block = result.read())
|
||||
{
|
||||
const auto & table_name_col = *block.getByPosition(0).column;
|
||||
@ -99,7 +100,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
convertMySQLDataType(
|
||||
type_support,
|
||||
column_type_col[i].safeGet<String>(),
|
||||
external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
|
||||
settings.external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
|
||||
is_unsigned_col[i].safeGet<UInt64>(),
|
||||
char_max_length_col[i].safeGet<UInt64>(),
|
||||
precision_col[i].safeGet<UInt64>(),
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -20,7 +21,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
|
||||
mysqlxx::PoolWithFailover & pool,
|
||||
const String & database_name,
|
||||
const std::vector<String> & tables_name,
|
||||
bool external_table_functions_use_nulls,
|
||||
const Settings & settings,
|
||||
MultiEnum<MySQLDataTypesSupport> type_support);
|
||||
|
||||
}
|
||||
|
@ -24,7 +24,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name, const std::vector<String> & fetch_tables)
|
||||
const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name,
|
||||
const std::vector<String> & fetch_tables, const Settings & global_settings)
|
||||
{
|
||||
std::unordered_map<String, String> tables_create_query;
|
||||
for (const auto & fetch_table_name : fetch_tables)
|
||||
@ -34,9 +35,10 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
{std::make_shared<DataTypeString>(), "Create Table"},
|
||||
};
|
||||
|
||||
StreamSettings mysql_input_stream_settings(global_settings, false, true);
|
||||
MySQLBlockInputStream show_create_table(
|
||||
connection, "SHOW CREATE TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(fetch_table_name),
|
||||
show_create_table_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
show_create_table_header, mysql_input_stream_settings);
|
||||
|
||||
Block create_query_block = show_create_table.read();
|
||||
if (!create_query_block || create_query_block.rows() != 1)
|
||||
@ -49,13 +51,14 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
}
|
||||
|
||||
|
||||
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database)
|
||||
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database, const Settings & global_settings)
|
||||
{
|
||||
Block header{{std::make_shared<DataTypeString>(), "table_name"}};
|
||||
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database);
|
||||
|
||||
std::vector<String> tables_in_db;
|
||||
MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(global_settings);
|
||||
MySQLBlockInputStream input(connection, query, header, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = input.read())
|
||||
{
|
||||
@ -77,7 +80,8 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c
|
||||
{std::make_shared<DataTypeString>(), "Executed_Gtid_Set"},
|
||||
};
|
||||
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, mysql_input_stream_settings);
|
||||
Block master_status = input.read();
|
||||
|
||||
if (!master_status || master_status.rows() != 1)
|
||||
@ -99,7 +103,8 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
|
||||
};
|
||||
|
||||
const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'";
|
||||
MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, mysql_input_stream_settings);
|
||||
|
||||
while (Block variables_block = variables_input.read())
|
||||
{
|
||||
@ -114,7 +119,7 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, WriteBuffer & out)
|
||||
static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings, WriteBuffer & out)
|
||||
{
|
||||
Block sync_user_privs_header
|
||||
{
|
||||
@ -122,7 +127,8 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
|
||||
};
|
||||
|
||||
String grants_query, sub_privs;
|
||||
MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, DEFAULT_BLOCK_SIZE);
|
||||
StreamSettings mysql_input_stream_settings(global_settings);
|
||||
MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, mysql_input_stream_settings);
|
||||
while (Block block = input.read())
|
||||
{
|
||||
for (size_t index = 0; index < block.rows(); ++index)
|
||||
@ -146,11 +152,11 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
|
||||
return false;
|
||||
}
|
||||
|
||||
static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection)
|
||||
static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
|
||||
if (!checkSyncUserPrivImpl(connection, out))
|
||||
if (!checkSyncUserPrivImpl(connection, global_settings, out))
|
||||
throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs "
|
||||
"at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' "
|
||||
"and SELECT PRIVILEGE on MySQL Database."
|
||||
@ -167,7 +173,8 @@ bool MaterializeMetadata::checkBinlogFileExists(const mysqlxx::PoolWithFailover:
|
||||
{std::make_shared<DataTypeUInt64>(), "File_size"}
|
||||
};
|
||||
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, mysql_input_stream_settings);
|
||||
|
||||
while (Block block = input.read())
|
||||
{
|
||||
@ -222,7 +229,7 @@ void MaterializeMetadata::transaction(const MySQLReplication::Position & positio
|
||||
commitMetadata(std::move(fun), persistent_tmp_path, persistent_path);
|
||||
}
|
||||
|
||||
MaterializeMetadata::MaterializeMetadata(const String & path_) : persistent_path(path_)
|
||||
MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & settings_) : persistent_path(path_), settings(settings_)
|
||||
{
|
||||
if (Poco::File(persistent_path).exists())
|
||||
{
|
||||
@ -244,7 +251,7 @@ void MaterializeMetadata::startReplication(
|
||||
mysqlxx::PoolWithFailover::Entry & connection, const String & database,
|
||||
bool & opened_transaction, std::unordered_map<String, String> & need_dumping_tables)
|
||||
{
|
||||
checkSyncUserPriv(connection);
|
||||
checkSyncUserPriv(connection, settings);
|
||||
|
||||
if (checkBinlogFileExists(connection))
|
||||
return;
|
||||
@ -263,7 +270,7 @@ void MaterializeMetadata::startReplication(
|
||||
connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute();
|
||||
|
||||
opened_transaction = true;
|
||||
need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database));
|
||||
need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), settings);
|
||||
connection->query("UNLOCK TABLES;").execute();
|
||||
}
|
||||
catch (...)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Core/MySQL/MySQLReplication.h>
|
||||
#include <mysqlxx/Connection.h>
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,6 +26,7 @@ namespace DB
|
||||
struct MaterializeMetadata
|
||||
{
|
||||
const String persistent_path;
|
||||
const Settings settings;
|
||||
|
||||
String binlog_file;
|
||||
UInt64 binlog_position;
|
||||
@ -50,7 +52,7 @@ struct MaterializeMetadata
|
||||
bool & opened_transaction,
|
||||
std::unordered_map<String, String> & need_dumping_tables);
|
||||
|
||||
MaterializeMetadata(const String & path_);
|
||||
MaterializeMetadata(const String & path_, const Settings & settings_);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ MaterializeMySQLSyncThread::~MaterializeMySQLSyncThread()
|
||||
}
|
||||
}
|
||||
|
||||
static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const Settings & settings)
|
||||
{
|
||||
Block variables_header{
|
||||
{std::make_shared<DataTypeString>(), "Variable_name"},
|
||||
@ -104,19 +104,19 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
"OR (Variable_name = 'default_authentication_plugin' AND upper(Value) = 'MYSQL_NATIVE_PASSWORD') "
|
||||
"OR (Variable_name = 'log_bin_use_v1_row_events' AND upper(Value) = 'OFF');";
|
||||
|
||||
MySQLBlockInputStream variables_input(connection, check_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
|
||||
StreamSettings mysql_input_stream_settings(settings, false, true);
|
||||
MySQLBlockInputStream variables_input(connection, check_query, variables_header, mysql_input_stream_settings);
|
||||
|
||||
Block variables_block = variables_input.read();
|
||||
if (!variables_block || variables_block.rows() != 5)
|
||||
std::unordered_map<String, String> variables_error_message{
|
||||
{"log_bin", "log_bin = 'ON'"},
|
||||
{"binlog_format", "binlog_format='ROW'"},
|
||||
{"binlog_row_image", "binlog_row_image='FULL'"},
|
||||
{"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
|
||||
{"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
|
||||
};
|
||||
|
||||
while (Block variables_block = variables_input.read())
|
||||
{
|
||||
std::unordered_map<String, String> variables_error_message{
|
||||
{"log_bin", "log_bin = 'ON'"},
|
||||
{"binlog_format", "binlog_format='ROW'"},
|
||||
{"binlog_row_image", "binlog_row_image='FULL'"},
|
||||
{"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
|
||||
{"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
|
||||
};
|
||||
|
||||
ColumnPtr variable_name_column = variables_block.getByName("Variable_name").column;
|
||||
|
||||
for (size_t index = 0; index < variables_block.rows(); ++index)
|
||||
@ -126,7 +126,10 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
|
||||
if (error_message_it != variables_error_message.end())
|
||||
variables_error_message.erase(error_message_it);
|
||||
}
|
||||
}
|
||||
|
||||
if (!variables_error_message.empty())
|
||||
{
|
||||
bool first = true;
|
||||
WriteBufferFromOwnString error_message;
|
||||
error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires ";
|
||||
@ -167,7 +170,7 @@ void MaterializeMySQLSyncThread::synchronization()
|
||||
try
|
||||
{
|
||||
MaterializeMetadata metadata(
|
||||
DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata");
|
||||
DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata", getContext()->getSettingsRef());
|
||||
bool need_reconnect = true;
|
||||
|
||||
Stopwatch watch;
|
||||
@ -240,7 +243,7 @@ void MaterializeMySQLSyncThread::assertMySQLAvailable()
|
||||
{
|
||||
try
|
||||
{
|
||||
checkMySQLVariables(pool.get());
|
||||
checkMySQLVariables(pool.get(), getContext()->getSettingsRef());
|
||||
}
|
||||
catch (const mysqlxx::ConnectionFailed & e)
|
||||
{
|
||||
@ -326,9 +329,10 @@ static inline void dumpDataForTables(
|
||||
tryToExecuteQuery(query_prefix + " " + iterator->second, query_context, database_name, comment); /// create table.
|
||||
|
||||
auto out = std::make_shared<CountingBlockOutputStream>(getTableOutput(database_name, table_name, query_context));
|
||||
StreamSettings mysql_input_stream_settings(context->getSettingsRef());
|
||||
MySQLBlockInputStream input(
|
||||
connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name),
|
||||
out->getHeader(), DEFAULT_BLOCK_SIZE);
|
||||
out->getHeader(), mysql_input_stream_settings);
|
||||
|
||||
Stopwatch watch;
|
||||
copyData(input, *out, is_cancelled);
|
||||
@ -375,7 +379,7 @@ bool MaterializeMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metad
|
||||
|
||||
opened_transaction = false;
|
||||
|
||||
checkMySQLVariables(connection);
|
||||
checkMySQLVariables(connection, getContext()->getSettingsRef());
|
||||
std::unordered_map<String, String> need_dumping_tables;
|
||||
metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables);
|
||||
|
||||
|
@ -4,9 +4,15 @@
|
||||
#include "DictionarySourceFactory.h"
|
||||
#include "DictionaryStructure.h"
|
||||
#include "registerDictionaries.h"
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
[[maybe_unused]]
|
||||
static const size_t default_num_tries_on_connection_loss = 3;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
@ -14,20 +20,20 @@ namespace ErrorCodes
|
||||
|
||||
void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
{
|
||||
auto create_table_source = [=](const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
Block & sample_block,
|
||||
ContextPtr /* context */,
|
||||
auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
|
||||
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
|
||||
[[maybe_unused]] const std::string & config_prefix,
|
||||
[[maybe_unused]] Block & sample_block,
|
||||
[[maybe_unused]] ContextPtr context,
|
||||
const std::string & /* default_database */,
|
||||
bool /* check_config */) -> DictionarySourcePtr {
|
||||
#if USE_MYSQL
|
||||
return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block);
|
||||
StreamSettings mysql_input_stream_settings(context->getSettingsRef()
|
||||
, config.getBool(config_prefix + ".mysql.close_connection", false) || config.getBool(config_prefix + ".mysql.share_connection", false)
|
||||
, false
|
||||
, config.getBool(config_prefix + ".mysql.fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss);
|
||||
return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block, mysql_input_stream_settings);
|
||||
#else
|
||||
(void)dict_struct;
|
||||
(void)config;
|
||||
(void)config_prefix;
|
||||
(void)sample_block;
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.");
|
||||
#endif
|
||||
@ -45,22 +51,21 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <common/LocalDateTime.h>
|
||||
# include <common/logger_useful.h>
|
||||
# include <Formats/MySQLBlockInputStream.h>
|
||||
# include "readInvalidateQuery.h"
|
||||
# include <mysqlxx/Exception.h>
|
||||
# include <mysqlxx/PoolFactory.h>
|
||||
# include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
static const UInt64 max_block_size = 8192;
|
||||
static const size_t default_num_tries_on_connection_loss = 3;
|
||||
|
||||
|
||||
MySQLDictionarySource::MySQLDictionarySource(
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
const Block & sample_block_)
|
||||
const Block & sample_block_,
|
||||
const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLDictionarySource"))
|
||||
, update_time{std::chrono::system_clock::from_time_t(0)}
|
||||
, dict_struct{dict_struct_}
|
||||
@ -74,10 +79,7 @@ MySQLDictionarySource::MySQLDictionarySource(
|
||||
, query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks}
|
||||
, load_all_query{query_builder.composeLoadAllQuery()}
|
||||
, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
|
||||
, close_connection(
|
||||
config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false))
|
||||
, max_tries_for_mysql_block_input_stream(
|
||||
config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss)
|
||||
, settings(settings_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -98,8 +100,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other
|
||||
, last_modification{other.last_modification}
|
||||
, invalidate_query{other.invalidate_query}
|
||||
, invalidate_query_response{other.invalidate_query_response}
|
||||
, close_connection{other.close_connection}
|
||||
, max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream}
|
||||
, settings(other.settings)
|
||||
{
|
||||
}
|
||||
|
||||
@ -122,7 +123,7 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
|
||||
BlockInputStreamPtr MySQLDictionarySource::loadFromQuery(const String & query)
|
||||
{
|
||||
return std::make_shared<MySQLWithFailoverBlockInputStream>(
|
||||
pool, query, sample_block, max_block_size, close_connection, false, max_tries_for_mysql_block_input_stream);
|
||||
pool, query, sample_block, settings);
|
||||
}
|
||||
|
||||
BlockInputStreamPtr MySQLDictionarySource::loadAll()
|
||||
@ -245,7 +246,7 @@ LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry &
|
||||
++fetched_rows;
|
||||
}
|
||||
|
||||
if (close_connection && allow_connection_closure)
|
||||
if (settings.auto_close && allow_connection_closure)
|
||||
{
|
||||
connection.disconnect();
|
||||
}
|
||||
@ -269,7 +270,7 @@ std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request
|
||||
Block invalidate_sample_block;
|
||||
ColumnPtr column(ColumnString::create());
|
||||
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
|
||||
MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, 1, close_connection);
|
||||
MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, settings);
|
||||
return readInvalidateQuery(block_input_stream);
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# include "DictionaryStructure.h"
|
||||
# include "ExternalQueryBuilder.h"
|
||||
# include "IDictionarySource.h"
|
||||
|
||||
# include <Formats/MySQLBlockInputStream.h>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -35,7 +35,8 @@ public:
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
const Block & sample_block_);
|
||||
const Block & sample_block_,
|
||||
const StreamSettings & settings_);
|
||||
|
||||
/// copy-constructor is provided in order to support cloneability
|
||||
MySQLDictionarySource(const MySQLDictionarySource & other);
|
||||
@ -87,8 +88,7 @@ private:
|
||||
LocalDateTime last_modification;
|
||||
std::string invalidate_query;
|
||||
mutable std::string invalidate_query_response;
|
||||
const bool close_connection;
|
||||
const size_t max_tries_for_mysql_block_input_stream;
|
||||
const StreamSettings settings;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -250,8 +250,12 @@ class ReadIndirectBufferFromS3 final : public ReadBufferFromFileBase
|
||||
{
|
||||
public:
|
||||
ReadIndirectBufferFromS3(
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, DiskS3::Metadata metadata_, size_t buf_size_)
|
||||
: client_ptr(std::move(client_ptr_)), bucket(bucket_), metadata(std::move(metadata_)), buf_size(buf_size_)
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, DiskS3::Metadata metadata_, UInt64 s3_max_single_read_retries_, size_t buf_size_)
|
||||
: client_ptr(std::move(client_ptr_))
|
||||
, bucket(bucket_)
|
||||
, metadata(std::move(metadata_))
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, buf_size(buf_size_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -307,7 +311,7 @@ private:
|
||||
const auto & [path, size] = metadata.s3_objects[i];
|
||||
if (size > offset)
|
||||
{
|
||||
auto buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, buf_size);
|
||||
auto buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, s3_max_single_read_retries, buf_size);
|
||||
buf->seek(offset, SEEK_SET);
|
||||
return buf;
|
||||
}
|
||||
@ -336,7 +340,7 @@ private:
|
||||
|
||||
++current_buf_idx;
|
||||
const auto & path = metadata.s3_objects[current_buf_idx].first;
|
||||
current_buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, buf_size);
|
||||
current_buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, metadata.s3_root_path + path, s3_max_single_read_retries, buf_size);
|
||||
current_buf->next();
|
||||
working_buffer = current_buf->buffer();
|
||||
absolute_position += working_buffer.size();
|
||||
@ -347,6 +351,7 @@ private:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||
const String & bucket;
|
||||
DiskS3::Metadata metadata;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t buf_size;
|
||||
|
||||
size_t absolute_position = 0;
|
||||
@ -560,6 +565,7 @@ DiskS3::DiskS3(
|
||||
String bucket_,
|
||||
String s3_root_path_,
|
||||
String metadata_path_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
size_t min_upload_part_size_,
|
||||
size_t max_single_part_upload_size_,
|
||||
size_t min_bytes_for_seek_,
|
||||
@ -573,6 +579,7 @@ DiskS3::DiskS3(
|
||||
, bucket(std::move(bucket_))
|
||||
, s3_root_path(std::move(s3_root_path_))
|
||||
, metadata_path(std::move(metadata_path_))
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, min_upload_part_size(min_upload_part_size_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, min_bytes_for_seek(min_bytes_for_seek_)
|
||||
@ -679,7 +686,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, si
|
||||
LOG_DEBUG(log, "Read from file by path: {}. Existing S3 objects: {}",
|
||||
backQuote(metadata_path + path), metadata.s3_objects.size());
|
||||
|
||||
auto reader = std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, buf_size);
|
||||
auto reader = std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, s3_max_single_read_retries, buf_size);
|
||||
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), min_bytes_for_seek);
|
||||
}
|
||||
|
||||
@ -979,7 +986,7 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
|
||||
if (!checkObjectExists(source_bucket, source_path + SCHEMA_VERSION_OBJECT))
|
||||
return version;
|
||||
|
||||
ReadBufferFromS3 buffer (client, source_bucket, source_path + SCHEMA_VERSION_OBJECT);
|
||||
ReadBufferFromS3 buffer(client, source_bucket, source_path + SCHEMA_VERSION_OBJECT, s3_max_single_read_retries);
|
||||
readIntText(version, buffer);
|
||||
|
||||
return version;
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
String bucket_,
|
||||
String s3_root_path_,
|
||||
String metadata_path_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
size_t min_upload_part_size_,
|
||||
size_t max_single_part_upload_size_,
|
||||
size_t min_bytes_for_seek_,
|
||||
@ -185,6 +186,7 @@ private:
|
||||
const String bucket;
|
||||
const String s3_root_path;
|
||||
String metadata_path;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t min_upload_part_size;
|
||||
size_t max_single_part_upload_size;
|
||||
size_t min_bytes_for_seek;
|
||||
|
@ -161,6 +161,7 @@ void registerDiskS3(DiskFactory & factory)
|
||||
uri.bucket,
|
||||
uri.key,
|
||||
metadata_path,
|
||||
context->getSettingsRef().s3_max_single_read_retries,
|
||||
context->getSettingsRef().s3_min_upload_part_size,
|
||||
context->getSettingsRef().s3_max_single_part_upload_size,
|
||||
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "gtest_disk.h"
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#endif
|
||||
|
@ -30,6 +30,15 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
StreamSettings::StreamSettings(const Settings & settings, bool auto_close_, bool fetch_by_name_, size_t max_retry_)
|
||||
: max_read_mysql_row_nums((settings.external_storage_max_read_rows) ? settings.external_storage_max_read_rows : settings.max_block_size)
|
||||
, max_read_mysql_bytes_size(settings.external_storage_max_read_bytes)
|
||||
, auto_close(auto_close_)
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, default_num_tries_on_connection_loss(max_retry_)
|
||||
{
|
||||
}
|
||||
|
||||
MySQLBlockInputStream::Connection::Connection(
|
||||
const mysqlxx::PoolWithFailover::Entry & entry_,
|
||||
const std::string & query_str)
|
||||
@ -44,29 +53,19 @@ MySQLBlockInputStream::MySQLBlockInputStream(
|
||||
const mysqlxx::PoolWithFailover::Entry & entry,
|
||||
const std::string & query_str,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_,
|
||||
const bool fetch_by_name_)
|
||||
const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, connection{std::make_unique<Connection>(entry, query_str)}
|
||||
, max_block_size{max_block_size_}
|
||||
, auto_close{auto_close_}
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, settings{std::make_unique<StreamSettings>(settings_)}
|
||||
{
|
||||
description.init(sample_block);
|
||||
initPositionMappingFromQueryResultStructure();
|
||||
}
|
||||
|
||||
/// For descendant MySQLWithFailoverBlockInputStream
|
||||
MySQLBlockInputStream::MySQLBlockInputStream(
|
||||
const Block & sample_block_,
|
||||
UInt64 max_block_size_,
|
||||
bool auto_close_,
|
||||
bool fetch_by_name_)
|
||||
MySQLBlockInputStream::MySQLBlockInputStream(const Block &sample_block_, const StreamSettings & settings_)
|
||||
: log(&Poco::Logger::get("MySQLBlockInputStream"))
|
||||
, max_block_size(max_block_size_)
|
||||
, auto_close(auto_close_)
|
||||
, fetch_by_name(fetch_by_name_)
|
||||
, settings(std::make_unique<StreamSettings>(settings_))
|
||||
{
|
||||
description.init(sample_block_);
|
||||
}
|
||||
@ -76,14 +75,10 @@ MySQLWithFailoverBlockInputStream::MySQLWithFailoverBlockInputStream(
|
||||
mysqlxx::PoolWithFailoverPtr pool_,
|
||||
const std::string & query_str_,
|
||||
const Block & sample_block_,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_,
|
||||
const bool fetch_by_name_,
|
||||
const size_t max_tries_)
|
||||
: MySQLBlockInputStream(sample_block_, max_block_size_, auto_close_, fetch_by_name_)
|
||||
, pool(pool_)
|
||||
, query_str(query_str_)
|
||||
, max_tries(max_tries_)
|
||||
const StreamSettings & settings_)
|
||||
: MySQLBlockInputStream(sample_block_, settings_)
|
||||
, pool(pool_)
|
||||
, query_str(query_str_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -101,12 +96,12 @@ void MySQLWithFailoverBlockInputStream::readPrefix()
|
||||
}
|
||||
catch (const mysqlxx::ConnectionLost & ecl) /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
|
||||
{
|
||||
LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, max_tries, ecl.displayText());
|
||||
LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, settings->default_num_tries_on_connection_loss, ecl.displayText());
|
||||
}
|
||||
|
||||
if (++count_connect_attempts > max_tries)
|
||||
if (++count_connect_attempts > settings->default_num_tries_on_connection_loss)
|
||||
{
|
||||
LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, max_tries);
|
||||
LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -118,45 +113,57 @@ namespace
|
||||
{
|
||||
using ValueType = ExternalResultDescription::ValueType;
|
||||
|
||||
void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value)
|
||||
void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value, size_t & read_bytes_size)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ValueType::vtUInt8:
|
||||
assert_cast<ColumnUInt8 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 1;
|
||||
break;
|
||||
case ValueType::vtUInt16:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtUInt32:
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtUInt64:
|
||||
assert_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtInt8:
|
||||
assert_cast<ColumnInt8 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 1;
|
||||
break;
|
||||
case ValueType::vtInt16:
|
||||
assert_cast<ColumnInt16 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtInt32:
|
||||
assert_cast<ColumnInt32 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtInt64:
|
||||
assert_cast<ColumnInt64 &>(column).insertValue(value.getInt());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtFloat32:
|
||||
assert_cast<ColumnFloat32 &>(column).insertValue(value.getDouble());
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
case ValueType::vtFloat64:
|
||||
assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
|
||||
read_bytes_size += 8;
|
||||
break;
|
||||
case ValueType::vtString:
|
||||
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
|
||||
read_bytes_size += assert_cast<ColumnString &>(column).byteSize();
|
||||
break;
|
||||
case ValueType::vtDate:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16(value.getDate().getDayNum()));
|
||||
read_bytes_size += 2;
|
||||
break;
|
||||
case ValueType::vtDateTime:
|
||||
{
|
||||
@ -166,10 +173,12 @@ namespace
|
||||
if (time < 0)
|
||||
time = 0;
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(time);
|
||||
read_bytes_size += 4;
|
||||
break;
|
||||
}
|
||||
case ValueType::vtUUID:
|
||||
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
|
||||
read_bytes_size += assert_cast<ColumnUInt128 &>(column).byteSize();
|
||||
break;
|
||||
case ValueType::vtDateTime64:[[fallthrough]];
|
||||
case ValueType::vtDecimal32: [[fallthrough]];
|
||||
@ -179,10 +188,12 @@ namespace
|
||||
{
|
||||
ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
|
||||
data_type.getDefaultSerialization()->deserializeWholeText(column, buffer, FormatSettings{});
|
||||
read_bytes_size += column.sizeOfValueIfFixed();
|
||||
break;
|
||||
}
|
||||
case ValueType::vtFixedString:
|
||||
assert_cast<ColumnFixedString &>(column).insertData(value.data(), value.size());
|
||||
read_bytes_size += column.sizeOfValueIfFixed();
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED);
|
||||
@ -198,7 +209,7 @@ Block MySQLBlockInputStream::readImpl()
|
||||
auto row = connection->result.fetch();
|
||||
if (!row)
|
||||
{
|
||||
if (auto_close)
|
||||
if (settings->auto_close)
|
||||
connection->entry.disconnect();
|
||||
|
||||
return {};
|
||||
@ -209,6 +220,8 @@ Block MySQLBlockInputStream::readImpl()
|
||||
columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();
|
||||
|
||||
size_t num_rows = 0;
|
||||
size_t read_bytes_size = 0;
|
||||
|
||||
while (row)
|
||||
{
|
||||
for (size_t index = 0; index < position_mapping.size(); ++index)
|
||||
@ -224,12 +237,12 @@ Block MySQLBlockInputStream::readImpl()
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value, read_bytes_size);
|
||||
column_nullable.getNullMapData().emplace_back(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value);
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value, read_bytes_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -245,7 +258,7 @@ Block MySQLBlockInputStream::readImpl()
|
||||
}
|
||||
|
||||
++num_rows;
|
||||
if (num_rows == max_block_size)
|
||||
if (num_rows == settings->max_read_mysql_row_nums || (settings->max_read_mysql_bytes_size && read_bytes_size >= settings->max_read_mysql_bytes_size))
|
||||
break;
|
||||
|
||||
row = connection->result.fetch();
|
||||
@ -257,7 +270,7 @@ void MySQLBlockInputStream::initPositionMappingFromQueryResultStructure()
|
||||
{
|
||||
position_mapping.resize(description.sample_block.columns());
|
||||
|
||||
if (!fetch_by_name)
|
||||
if (!settings->fetch_by_name)
|
||||
{
|
||||
if (description.sample_block.columns() != connection->result.getNumFields())
|
||||
throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
|
||||
|
@ -6,11 +6,24 @@
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
#include <mysqlxx/Query.h>
|
||||
#include <Core/ExternalResultDescription.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct StreamSettings
|
||||
{
|
||||
/// Check if setting is enabled, otherwise use common `max_block_size` setting.
|
||||
size_t max_read_mysql_row_nums;
|
||||
size_t max_read_mysql_bytes_size;
|
||||
bool auto_close;
|
||||
bool fetch_by_name;
|
||||
size_t default_num_tries_on_connection_loss;
|
||||
|
||||
StreamSettings(const Settings & settings, bool auto_close_ = false, bool fetch_by_name_ = false, size_t max_retry_ = 5);
|
||||
|
||||
};
|
||||
|
||||
/// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining
|
||||
class MySQLBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
@ -19,16 +32,14 @@ public:
|
||||
const mysqlxx::PoolWithFailover::Entry & entry,
|
||||
const std::string & query_str,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_ = false,
|
||||
const bool fetch_by_name_ = false);
|
||||
const StreamSettings & settings_);
|
||||
|
||||
String getName() const override { return "MySQL"; }
|
||||
|
||||
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
|
||||
|
||||
protected:
|
||||
MySQLBlockInputStream(const Block & sample_block_, UInt64 max_block_size_, bool auto_close_, bool fetch_by_name_);
|
||||
MySQLBlockInputStream(const Block & sample_block_, const StreamSettings & settings);
|
||||
Block readImpl() override;
|
||||
void initPositionMappingFromQueryResultStructure();
|
||||
|
||||
@ -44,9 +55,7 @@ protected:
|
||||
Poco::Logger * log;
|
||||
std::unique_ptr<Connection> connection;
|
||||
|
||||
const UInt64 max_block_size;
|
||||
const bool auto_close;
|
||||
const bool fetch_by_name;
|
||||
const std::unique_ptr<StreamSettings> settings;
|
||||
std::vector<size_t> position_mapping;
|
||||
ExternalResultDescription description;
|
||||
};
|
||||
@ -57,23 +66,18 @@ protected:
|
||||
class MySQLWithFailoverBlockInputStream final : public MySQLBlockInputStream
|
||||
{
|
||||
public:
|
||||
static constexpr inline auto MAX_TRIES_MYSQL_CONNECT = 5;
|
||||
|
||||
MySQLWithFailoverBlockInputStream(
|
||||
mysqlxx::PoolWithFailoverPtr pool_,
|
||||
const std::string & query_str_,
|
||||
const Block & sample_block_,
|
||||
const UInt64 max_block_size_,
|
||||
const bool auto_close_ = false,
|
||||
const bool fetch_by_name_ = false,
|
||||
const size_t max_tries_ = MAX_TRIES_MYSQL_CONNECT);
|
||||
const StreamSettings & settings_);
|
||||
|
||||
private:
|
||||
void readPrefix() override;
|
||||
|
||||
mysqlxx::PoolWithFailoverPtr pool;
|
||||
std::string query_str;
|
||||
size_t max_tries;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
|
||||
|
||||
|
||||
/// The methods can be virtual or not depending on the template parameter. See IStringSource.
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#elif __clang_major__ >= 11
|
||||
@ -233,7 +233,7 @@ struct ConstSource : public Base
|
||||
}
|
||||
};
|
||||
|
||||
#if !__clang__ || __clang_major__ >= 11
|
||||
#if !defined(__clang__) || __clang_major__ >= 11
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
/// Warning in boost::geometry during template strategy substitution.
|
||||
#pragma GCC diagnostic push
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
|
||||
@ -285,7 +285,7 @@ void PointInPolygonWithGrid<CoordinateType>::calcGridAttributes(
|
||||
const Point & max_corner = box.max_corner();
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
|
||||
@ -322,7 +322,7 @@ void PointInPolygonWithGrid<CoordinateType>::buildGrid()
|
||||
for (size_t row = 0; row < grid_size; ++row)
|
||||
{
|
||||
#pragma GCC diagnostic push
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
CoordinateType y_min = min_corner.y() + row * cell_height;
|
||||
|
@ -45,6 +45,7 @@ void registerFunctionsUnixTimestamp64(FunctionFactory & factory);
|
||||
void registerFunctionBitHammingDistance(FunctionFactory & factory);
|
||||
void registerFunctionTupleHammingDistance(FunctionFactory & factory);
|
||||
void registerFunctionsStringHash(FunctionFactory & factory);
|
||||
void registerFunctionValidateNestedArraySizes(FunctionFactory & factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
void registerFunctionBayesAB(FunctionFactory &);
|
||||
#endif
|
||||
@ -103,6 +104,7 @@ void registerFunctions()
|
||||
registerFunctionBitHammingDistance(factory);
|
||||
registerFunctionTupleHammingDistance(factory);
|
||||
registerFunctionsStringHash(factory);
|
||||
registerFunctionValidateNestedArraySizes(factory);
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerFunctionBayesAB(factory);
|
||||
|
113
src/Functions/validateNestedArraySizes.cpp
Normal file
113
src/Functions/validateNestedArraySizes.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update
|
||||
* Arguments: num > 2
|
||||
* The first argument is the condition of WHERE in UPDATE operation, only when this is true, we need to check
|
||||
* The rest arguments are the subcolumns of Nested DataType.
|
||||
*/
|
||||
class FunctionValidateNestedArraySizes : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "validateNestedArraySizes";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionValidateNestedArraySizes>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;
|
||||
};
|
||||
|
||||
DataTypePtr FunctionValidateNestedArraySizes::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
size_t num_args = arguments.size();
|
||||
|
||||
if (num_args < 3)
|
||||
throw Exception(
|
||||
"Function " + getName() + " needs more than two arguments; passed " + toString(arguments.size()) + ".",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!WhichDataType(arguments[0]).isUInt8())
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + " Must be UInt.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
for (size_t i = 1; i < num_args; ++i)
|
||||
if (!WhichDataType(arguments[i]).isArray())
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[i]->getName() + " of " + toString(i) + " argument of function " + getName() + " Must be Array.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
ColumnPtr FunctionValidateNestedArraySizes::executeImpl(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const
|
||||
{
|
||||
const ColumnUInt8 * condition_column = typeid_cast<const ColumnUInt8 *>(arguments[0].column.get());
|
||||
|
||||
size_t args_num = arguments.size();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (!condition_column->getData()[i])
|
||||
continue;
|
||||
|
||||
/// The condition is true, then check the row in subcolumns in Nested Type has the same array size
|
||||
size_t first_length = 0;
|
||||
size_t length = 0;
|
||||
for (size_t args_idx = 1; args_idx < args_num; ++args_idx)
|
||||
{
|
||||
const auto & current_arg = arguments[args_idx];
|
||||
const ColumnArray * current_column = nullptr;
|
||||
if (const auto * const_array = checkAndGetColumnConst<ColumnArray>(current_arg.column.get()))
|
||||
{
|
||||
current_column = checkAndGetColumn<ColumnArray>(&const_array->getDataColumn());
|
||||
length = current_column->getOffsets()[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
current_column = checkAndGetColumn<ColumnArray>(current_arg.column.get());
|
||||
const auto & offsets = current_column->getOffsets();
|
||||
length = offsets[i] - offsets[i - 1];
|
||||
}
|
||||
|
||||
if (args_idx == 1)
|
||||
{
|
||||
first_length = length;
|
||||
}
|
||||
else if (first_length != length)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
"Elements '{}' and '{}' of Nested data structure (Array columns) "
|
||||
"have different array sizes ({} and {} respectively) on row {}",
|
||||
arguments[1].name, arguments[args_idx].name, first_length, length, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ColumnUInt8::create(input_rows_count, 1);
|
||||
}
|
||||
|
||||
void registerFunctionValidateNestedArraySizes(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionValidateNestedArraySizes>();
|
||||
}
|
||||
|
||||
}
|
@ -12,10 +12,12 @@
|
||||
|
||||
# include <utility>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event S3ReadMicroseconds;
|
||||
extern const Event S3ReadBytes;
|
||||
extern const Event S3ReadRequestsErrors;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -29,26 +31,58 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
ReadBufferFromS3::ReadBufferFromS3(
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_, size_t buffer_size_)
|
||||
: SeekableReadBuffer(nullptr, 0), client_ptr(std::move(client_ptr_)), bucket(bucket_), key(key_), buffer_size(buffer_size_)
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_, UInt64 s3_max_single_read_retries_, size_t buffer_size_)
|
||||
: SeekableReadBuffer(nullptr, 0)
|
||||
, client_ptr(std::move(client_ptr_))
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, buffer_size(buffer_size_)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool ReadBufferFromS3::nextImpl()
|
||||
{
|
||||
if (!initialized)
|
||||
{
|
||||
/// Restoring valid value of `count()` during `nextImpl()`. See `ReadBuffer::next()`.
|
||||
pos = working_buffer.begin();
|
||||
|
||||
if (!impl)
|
||||
impl = initialize();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
Stopwatch watch;
|
||||
auto res = impl->next();
|
||||
bool next_result = false;
|
||||
|
||||
for (Int64 attempt = static_cast<Int64>(s3_max_single_read_retries); attempt >= 0; --attempt)
|
||||
{
|
||||
if (!impl)
|
||||
impl = initialize();
|
||||
|
||||
try
|
||||
{
|
||||
next_result = impl->next();
|
||||
/// FIXME. 1. Poco `istream` cannot read less than buffer_size or this state is being discarded during
|
||||
/// istream <-> iostream conversion. `gcount` always contains 0,
|
||||
/// that's why we always have error "Cannot read from istream at offset 0".
|
||||
|
||||
break;
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors, 1);
|
||||
|
||||
LOG_INFO(log, "Caught exception while reading S3 object. Bucket: {}, Key: {}, Offset: {}, Remaining attempts: {}, Message: {}",
|
||||
bucket, key, getPosition(), attempt, e.message());
|
||||
|
||||
impl.reset();
|
||||
|
||||
if (!attempt)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds());
|
||||
|
||||
if (!res)
|
||||
if (!next_result)
|
||||
return false;
|
||||
internal_buffer = impl->buffer();
|
||||
|
||||
@ -60,7 +94,7 @@ bool ReadBufferFromS3::nextImpl()
|
||||
|
||||
off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
||||
{
|
||||
if (initialized)
|
||||
if (impl)
|
||||
throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||
|
||||
if (whence != SEEK_SET)
|
||||
@ -74,7 +108,6 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
off_t ReadBufferFromS3::getPosition()
|
||||
{
|
||||
return offset + count();
|
||||
@ -82,13 +115,13 @@ off_t ReadBufferFromS3::getPosition()
|
||||
|
||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
{
|
||||
LOG_TRACE(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, std::to_string(offset));
|
||||
LOG_TRACE(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, getPosition());
|
||||
|
||||
Aws::S3::Model::GetObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
if (offset != 0)
|
||||
req.SetRange("bytes=" + std::to_string(offset) + "-");
|
||||
if (getPosition())
|
||||
req.SetRange("bytes=" + std::to_string(getPosition()) + "-");
|
||||
|
||||
Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
|
||||
|
||||
|
@ -27,8 +27,8 @@ private:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||
String bucket;
|
||||
String key;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t buffer_size;
|
||||
bool initialized = false;
|
||||
off_t offset = 0;
|
||||
Aws::S3::Model::GetObjectResult read_result;
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
@ -40,6 +40,7 @@ public:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||
const String & bucket_,
|
||||
const String & key_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
|
||||
|
||||
bool nextImpl() override;
|
||||
|
@ -46,7 +46,7 @@ inline size_t readAlpha(char * res, size_t max_chars, ReadBuffer & in)
|
||||
}
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
#endif
|
||||
@ -634,7 +634,7 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf
|
||||
}
|
||||
|
||||
#if defined(__PPC__)
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -349,6 +350,35 @@ static void validateUpdateColumns(
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns ASTs of updated nested subcolumns, if all of subcolumns were updated.
|
||||
/// They are used to validate sizes of nested arrays.
|
||||
/// If some of subcolumns were updated and some weren't,
|
||||
/// it makes sense to validate only updated columns with their old versions,
|
||||
/// because their sizes couldn't change, since sizes of all nested subcolumns must be consistent.
|
||||
static std::optional<std::vector<ASTPtr>> getExpressionsOfUpdatedNestedSubcolumns(
|
||||
const String & column_name,
|
||||
const NamesAndTypesList & all_columns,
|
||||
const std::unordered_map<String, ASTPtr> & column_to_update_expression)
|
||||
{
|
||||
std::vector<ASTPtr> res;
|
||||
auto source_name = Nested::splitName(column_name).first;
|
||||
|
||||
/// Check this nested subcolumn
|
||||
for (const auto & column : all_columns)
|
||||
{
|
||||
auto split = Nested::splitName(column.name);
|
||||
if (isArray(column.type) && split.first == source_name && !split.second.empty())
|
||||
{
|
||||
auto it = column_to_update_expression.find(column.name);
|
||||
if (it == column_to_update_expression.end())
|
||||
return {};
|
||||
|
||||
res.push_back(it->second);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
@ -398,7 +428,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
auto dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns);
|
||||
|
||||
/// First, break a sequence of commands into stages.
|
||||
for (const auto & command : commands)
|
||||
for (auto & command : commands)
|
||||
{
|
||||
if (command.type == MutationCommand::DELETE)
|
||||
{
|
||||
@ -438,12 +468,43 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
///
|
||||
/// Outer CAST is added just in case if we don't trust the returning type of 'if'.
|
||||
|
||||
auto type_literal = std::make_shared<ASTLiteral>(columns_desc.getPhysical(column).type->getName());
|
||||
const auto & type = columns_desc.getPhysical(column).type;
|
||||
auto type_literal = std::make_shared<ASTLiteral>(type->getName());
|
||||
|
||||
const auto & update_expr = kv.second;
|
||||
|
||||
ASTPtr condition = getPartitionAndPredicateExpressionForMutationCommand(command);
|
||||
|
||||
/// And new check validateNestedArraySizes for Nested subcolumns
|
||||
if (isArray(type) && !Nested::splitName(column).second.empty())
|
||||
{
|
||||
std::shared_ptr<ASTFunction> function = nullptr;
|
||||
|
||||
auto nested_update_exprs = getExpressionsOfUpdatedNestedSubcolumns(column, all_columns, command.column_to_update_expression);
|
||||
if (!nested_update_exprs)
|
||||
{
|
||||
function = makeASTFunction("validateNestedArraySizes",
|
||||
condition,
|
||||
update_expr->clone(),
|
||||
std::make_shared<ASTIdentifier>(column));
|
||||
condition = makeASTFunction("and", condition, function);
|
||||
}
|
||||
else if (nested_update_exprs->size() > 1)
|
||||
{
|
||||
function = std::make_shared<ASTFunction>();
|
||||
function->name = "validateNestedArraySizes";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
function->arguments->children.push_back(condition);
|
||||
for (const auto & it : *nested_update_exprs)
|
||||
function->arguments->children.push_back(it->clone());
|
||||
condition = makeASTFunction("and", condition, function);
|
||||
}
|
||||
}
|
||||
|
||||
auto updated_column = makeASTFunction("CAST",
|
||||
makeASTFunction("if",
|
||||
getPartitionAndPredicateExpressionForMutationCommand(command),
|
||||
condition,
|
||||
makeASTFunction("CAST",
|
||||
update_expr->clone(),
|
||||
type_literal),
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
|
||||
|
||||
namespace
|
||||
@ -13,7 +14,7 @@ using namespace DB;
|
||||
|
||||
Field executeFunctionOnField(
|
||||
const Field & field, const std::string & name,
|
||||
const ExpressionActionsPtr & expr,
|
||||
const ExpressionActionsPtr & sharding_expr,
|
||||
const std::string & sharding_key_column_name)
|
||||
{
|
||||
DataTypePtr type = applyVisitor(FieldToDataType{}, field);
|
||||
@ -25,17 +26,23 @@ Field executeFunctionOnField(
|
||||
|
||||
Block block{column};
|
||||
size_t num_rows = 1;
|
||||
expr->execute(block, num_rows);
|
||||
sharding_expr->execute(block, num_rows);
|
||||
|
||||
ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
|
||||
return (*ret.column)[0];
|
||||
}
|
||||
|
||||
/// Return true if shard may contain such value (or it is unknown), otherwise false.
|
||||
/// @param sharding_column_value - one of values from IN
|
||||
/// @param sharding_column_name - name of that column
|
||||
/// @param sharding_expr - expression of sharding_key for the Distributed() table
|
||||
/// @param sharding_key_column_name - name of the column for sharding_expr
|
||||
/// @param shard_info - info for the current shard (to compare shard_num with calculated)
|
||||
/// @param slots - weight -> shard mapping
|
||||
/// @return true if shard may contain such value (or it is unknown), otherwise false.
|
||||
bool shardContains(
|
||||
const Field & sharding_column_value,
|
||||
const std::string & sharding_column_name,
|
||||
const ExpressionActionsPtr & expr,
|
||||
const ExpressionActionsPtr & sharding_expr,
|
||||
const std::string & sharding_key_column_name,
|
||||
const Cluster::ShardInfo & shard_info,
|
||||
const Cluster::SlotToShard & slots)
|
||||
@ -45,7 +52,14 @@ bool shardContains(
|
||||
if (sharding_column_value.isNull())
|
||||
return false;
|
||||
|
||||
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name);
|
||||
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, sharding_expr, sharding_key_column_name);
|
||||
/// The value from IN can be non-numeric,
|
||||
/// but in this case it should be convertible to numeric type, let's try.
|
||||
sharding_value = convertFieldToType(sharding_value, DataTypeUInt64());
|
||||
/// In case of conversion is not possible (NULL), shard cannot contain the value anyway.
|
||||
if (sharding_value.isNull())
|
||||
return false;
|
||||
|
||||
UInt64 value = sharding_value.get<UInt64>();
|
||||
const auto shard_num = slots[value % slots.size()] + 1;
|
||||
return shard_info.shard_num == shard_num;
|
||||
@ -78,10 +92,10 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
|
||||
if (!identifier)
|
||||
return;
|
||||
|
||||
const auto & expr = data.sharding_key_expr;
|
||||
const auto & sharding_expr = data.sharding_key_expr;
|
||||
const auto & sharding_key_column_name = data.sharding_key_column_name;
|
||||
|
||||
if (!expr->getRequiredColumnsWithTypes().contains(identifier->name()))
|
||||
if (!sharding_expr->getRequiredColumnsWithTypes().contains(identifier->name()))
|
||||
return;
|
||||
|
||||
/// NOTE: that we should not take care about empty tuple,
|
||||
@ -93,7 +107,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
|
||||
std::erase_if(tuple_elements->children, [&](auto & child)
|
||||
{
|
||||
auto * literal = child->template as<ASTLiteral>();
|
||||
return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
|
||||
return literal && !shardContains(literal->value, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
|
||||
});
|
||||
}
|
||||
else if (auto * tuple_literal = right->as<ASTLiteral>();
|
||||
@ -102,7 +116,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
|
||||
auto & tuple = tuple_literal->value.get<Tuple &>();
|
||||
std::erase_if(tuple, [&](auto & child)
|
||||
{
|
||||
return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
|
||||
return !shardContains(child, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -12,8 +12,7 @@ namespace DB
|
||||
static bool isUniq(const ASTFunction & func)
|
||||
{
|
||||
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|
||||
|| func.name == "uniqCombined" || func.name == "uniqCombined64"
|
||||
|| func.name == "uniqThetaSketch";
|
||||
|| func.name == "uniqCombined" || func.name == "uniqCombined64";
|
||||
}
|
||||
|
||||
/// Remove injective functions of one argument: replace with a child
|
||||
|
@ -158,6 +158,8 @@ void TCPHandler::runImpl()
|
||||
}
|
||||
|
||||
Settings connection_settings = connection_context->getSettings();
|
||||
UInt64 idle_connection_timeout = connection_settings.idle_connection_timeout;
|
||||
UInt64 poll_interval = connection_settings.poll_interval;
|
||||
|
||||
sendHello();
|
||||
|
||||
@ -168,10 +170,10 @@ void TCPHandler::runImpl()
|
||||
/// We are waiting for a packet from the client. Thus, every `poll_interval` seconds check whether we need to shut down.
|
||||
{
|
||||
Stopwatch idle_time;
|
||||
while (!server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(
|
||||
std::min(connection_settings.poll_interval, connection_settings.idle_connection_timeout) * 1000000))
|
||||
UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000;
|
||||
while (!server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_ms))
|
||||
{
|
||||
if (idle_time.elapsedSeconds() > connection_settings.idle_connection_timeout)
|
||||
if (idle_time.elapsedSeconds() > idle_connection_timeout)
|
||||
{
|
||||
LOG_TRACE(log, "Closing idle connection");
|
||||
return;
|
||||
@ -212,6 +214,15 @@ void TCPHandler::runImpl()
|
||||
if (!receivePacket())
|
||||
continue;
|
||||
|
||||
/** If Query received, then settings in query_context has been updated
|
||||
* So, update some other connection settings, for flexibility.
|
||||
*/
|
||||
{
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
idle_connection_timeout = settings.idle_connection_timeout;
|
||||
poll_interval = settings.poll_interval;
|
||||
}
|
||||
|
||||
/** If part_uuids got received in previous packet, trying to read again.
|
||||
*/
|
||||
if (state.empty() && state.part_uuids && !receivePacket())
|
||||
@ -274,10 +285,10 @@ void TCPHandler::runImpl()
|
||||
if (context != query_context)
|
||||
throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
size_t poll_interval;
|
||||
size_t poll_interval_ms;
|
||||
int receive_timeout;
|
||||
std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings);
|
||||
if (!readDataNext(poll_interval, receive_timeout))
|
||||
std::tie(poll_interval_ms, receive_timeout) = getReadTimeouts(connection_settings);
|
||||
if (!readDataNext(poll_interval_ms, receive_timeout))
|
||||
{
|
||||
state.block_in.reset();
|
||||
state.maybe_compressed_in.reset();
|
||||
@ -985,8 +996,6 @@ bool TCPHandler::receivePacket()
|
||||
|
||||
switch (packet_type)
|
||||
{
|
||||
case Protocol::Client::ReadTaskResponse:
|
||||
throw Exception("ReadTaskResponse must be received only after requesting in callback", ErrorCodes::LOGICAL_ERROR);
|
||||
case Protocol::Client::IgnoredPartUUIDs:
|
||||
/// Part uuids packet if any comes before query.
|
||||
receiveIgnoredPartUUIDs();
|
||||
|
@ -1107,13 +1107,13 @@ void IMergeTreeDataPart::remove(bool keep_s3) const
|
||||
{
|
||||
/// Remove each expected file in directory, then remove directory itself.
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
for (const auto & [file, _] : checksums.files)
|
||||
volume->getDisk()->removeSharedFile(to + "/" + file, keep_s3);
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Compression/CompressedReadBufferFromFile.h>
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -393,8 +394,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type.getName());
|
||||
|
||||
auto disk = data_part->volume->getDisk();
|
||||
String mrk_path = fullPath(disk, part_path + name + marks_file_extension);
|
||||
String bin_path = fullPath(disk, part_path + name + DATA_FILE_EXTENSION);
|
||||
String escaped_name = escapeForFileName(name);
|
||||
String mrk_path = fullPath(disk, part_path + escaped_name + marks_file_extension);
|
||||
String bin_path = fullPath(disk, part_path + escaped_name + DATA_FILE_EXTENSION);
|
||||
DB::ReadBufferFromFile mrk_in(mrk_path);
|
||||
DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0, 0, nullptr);
|
||||
bool must_be_last = false;
|
||||
|
@ -71,7 +71,7 @@ Pipe StorageMySQL::read(
|
||||
SelectQueryInfo & query_info_,
|
||||
ContextPtr context_,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
size_t max_block_size_,
|
||||
size_t /*max_block_size*/,
|
||||
unsigned)
|
||||
{
|
||||
metadata_snapshot->check(column_names_, getVirtuals(), getStorageID());
|
||||
@ -95,8 +95,10 @@ Pipe StorageMySQL::read(
|
||||
sample_block.insert({ column_data.type, column_data.name });
|
||||
}
|
||||
|
||||
|
||||
StreamSettings mysql_input_stream_settings(context_->getSettingsRef(), true, false);
|
||||
return Pipe(std::make_shared<SourceFromInputStream>(
|
||||
std::make_shared<MySQLWithFailoverBlockInputStream>(pool, query, sample_block, max_block_size_, /* auto_close = */ true)));
|
||||
std::make_shared<MySQLWithFailoverBlockInputStream>(pool, query, sample_block, mysql_input_stream_settings)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -166,6 +166,7 @@ StorageS3Source::StorageS3Source(
|
||||
ContextPtr context_,
|
||||
const ColumnsDescription & columns_,
|
||||
UInt64 max_block_size_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
const String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket_,
|
||||
@ -177,6 +178,7 @@ StorageS3Source::StorageS3Source(
|
||||
, format(format_)
|
||||
, columns_desc(columns_)
|
||||
, max_block_size(max_block_size_)
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, compression_hint(compression_hint_)
|
||||
, client(client_)
|
||||
, sample_block(sample_block_)
|
||||
@ -197,7 +199,7 @@ bool StorageS3Source::initialize()
|
||||
file_path = bucket + "/" + current_key;
|
||||
|
||||
read_buf = wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key), chooseCompressionMethod(current_key, compression_hint));
|
||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key, s3_max_single_read_retries), chooseCompressionMethod(current_key, compression_hint));
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, getContext(), max_block_size);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
|
||||
@ -312,6 +314,7 @@ StorageS3::StorageS3(
|
||||
const String & secret_access_key_,
|
||||
const StorageID & table_id_,
|
||||
const String & format_name_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
UInt64 min_upload_part_size_,
|
||||
UInt64 max_single_part_upload_size_,
|
||||
UInt64 max_connections_,
|
||||
@ -323,6 +326,7 @@ StorageS3::StorageS3(
|
||||
: IStorage(table_id_)
|
||||
, client_auth{uri_, access_key_id_, secret_access_key_, max_connections_, {}, {}} /// Client and settings will be updated later
|
||||
, format_name(format_name_)
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
, min_upload_part_size(min_upload_part_size_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, compression_method(compression_method_)
|
||||
@ -389,6 +393,7 @@ Pipe StorageS3::read(
|
||||
local_context,
|
||||
metadata_snapshot->getColumns(),
|
||||
max_block_size,
|
||||
s3_max_single_read_retries,
|
||||
compression_method,
|
||||
client_auth.client,
|
||||
client_auth.uri.bucket,
|
||||
@ -474,6 +479,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
|
||||
secret_access_key = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
UInt64 s3_max_single_read_retries = args.getLocalContext()->getSettingsRef().s3_max_single_read_retries;
|
||||
UInt64 min_upload_part_size = args.getLocalContext()->getSettingsRef().s3_min_upload_part_size;
|
||||
UInt64 max_single_part_upload_size = args.getLocalContext()->getSettingsRef().s3_max_single_part_upload_size;
|
||||
UInt64 max_connections = args.getLocalContext()->getSettingsRef().s3_max_connections;
|
||||
@ -497,6 +503,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
|
||||
secret_access_key,
|
||||
args.table_id,
|
||||
format_name,
|
||||
s3_max_single_read_retries,
|
||||
min_upload_part_size,
|
||||
max_single_part_upload_size,
|
||||
max_connections,
|
||||
|
@ -55,6 +55,7 @@ public:
|
||||
ContextPtr context_,
|
||||
const ColumnsDescription & columns_,
|
||||
UInt64 max_block_size_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
const String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket,
|
||||
@ -71,6 +72,7 @@ private:
|
||||
String format;
|
||||
ColumnsDescription columns_desc;
|
||||
UInt64 max_block_size;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
String compression_hint;
|
||||
std::shared_ptr<Aws::S3::S3Client> client;
|
||||
Block sample_block;
|
||||
@ -100,6 +102,7 @@ public:
|
||||
const String & secret_access_key,
|
||||
const StorageID & table_id_,
|
||||
const String & format_name_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
UInt64 min_upload_part_size_,
|
||||
UInt64 max_single_part_upload_size_,
|
||||
UInt64 max_connections_,
|
||||
@ -145,6 +148,7 @@ private:
|
||||
ClientAuthentificaiton client_auth;
|
||||
|
||||
String format_name;
|
||||
UInt64 s3_max_single_read_retries;
|
||||
size_t min_upload_part_size;
|
||||
size_t max_single_part_upload_size;
|
||||
String compression_method;
|
||||
|
@ -17,6 +17,7 @@ const char * auto_contributors[] {
|
||||
"Aleksei Semiglazov",
|
||||
"Aleksey",
|
||||
"Aleksey Akulovich",
|
||||
"Alex",
|
||||
"Alex Bocharov",
|
||||
"Alex Karo",
|
||||
"Alex Krash",
|
||||
@ -144,6 +145,7 @@ const char * auto_contributors[] {
|
||||
"Chao Wang",
|
||||
"Chen Yufei",
|
||||
"Chienlung Cheung",
|
||||
"Christian",
|
||||
"Ciprian Hacman",
|
||||
"Clement Rodriguez",
|
||||
"Clément Rodriguez",
|
||||
@ -175,6 +177,7 @@ const char * auto_contributors[] {
|
||||
"Dmitry Belyavtsev",
|
||||
"Dmitry Bilunov",
|
||||
"Dmitry Galuza",
|
||||
"Dmitry Krylov",
|
||||
"Dmitry Luhtionov",
|
||||
"Dmitry Moskowski",
|
||||
"Dmitry Muzyka",
|
||||
@ -185,6 +188,7 @@ const char * auto_contributors[] {
|
||||
"Dongdong Yang",
|
||||
"DoomzD",
|
||||
"Dr. Strange Looker",
|
||||
"Egor O'Sten",
|
||||
"Ekaterina",
|
||||
"Eldar Zaitov",
|
||||
"Elena Baskakova",
|
||||
@ -286,6 +290,7 @@ const char * auto_contributors[] {
|
||||
"Jochen Schalanda",
|
||||
"John",
|
||||
"John Hummel",
|
||||
"John Skopis",
|
||||
"Jonatas Freitas",
|
||||
"Kang Liu",
|
||||
"Karl Pietrzak",
|
||||
@ -395,6 +400,7 @@ const char * auto_contributors[] {
|
||||
"NeZeD [Mac Pro]",
|
||||
"Neeke Gao",
|
||||
"Neng Liu",
|
||||
"Nickolay Yastrebov",
|
||||
"Nico Mandery",
|
||||
"Nico Piderman",
|
||||
"Nicolae Vartolomei",
|
||||
@ -472,6 +478,7 @@ const char * auto_contributors[] {
|
||||
"Sami Kerola",
|
||||
"Samuel Chou",
|
||||
"Saulius Valatka",
|
||||
"Serg Kulakov",
|
||||
"Serge Rider",
|
||||
"Sergei Bocharov",
|
||||
"Sergei Semin",
|
||||
@ -606,6 +613,7 @@ const char * auto_contributors[] {
|
||||
"abyss7",
|
||||
"achimbab",
|
||||
"achulkov2",
|
||||
"adevyatova",
|
||||
"ageraab",
|
||||
"akazz",
|
||||
"akonyaev",
|
||||
@ -631,6 +639,7 @@ const char * auto_contributors[] {
|
||||
"artpaul",
|
||||
"asiana21",
|
||||
"avasiliev",
|
||||
"avogar",
|
||||
"avsharapov",
|
||||
"awesomeleo",
|
||||
"benamazing",
|
||||
@ -647,6 +656,8 @@ const char * auto_contributors[] {
|
||||
"centos7",
|
||||
"champtar",
|
||||
"chang.chen",
|
||||
"changvvb",
|
||||
"chasingegg",
|
||||
"chengy8934",
|
||||
"chenqi",
|
||||
"chenxing-xc",
|
||||
@ -769,6 +780,7 @@ const char * auto_contributors[] {
|
||||
"maxim-babenko",
|
||||
"maxkuzn",
|
||||
"maxulan",
|
||||
"mehanizm",
|
||||
"melin",
|
||||
"memo",
|
||||
"meo",
|
||||
@ -831,6 +843,7 @@ const char * auto_contributors[] {
|
||||
"shangshujie",
|
||||
"shedx",
|
||||
"simon-says",
|
||||
"songenjie",
|
||||
"spff",
|
||||
"spongedc",
|
||||
"spyros87",
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
|
||||
#include <Processors/QueryPipeline.h>
|
||||
|
||||
#if !__clang__
|
||||
#if !defined(__clang__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#endif
|
||||
|
@ -79,7 +79,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr
|
||||
ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context) const
|
||||
{
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const auto tables_and_columns = fetchTablesColumnsList(*pool, remote_database_name, {remote_table_name}, settings.external_table_functions_use_nulls, settings.mysql_datatypes_support_level);
|
||||
const auto tables_and_columns = fetchTablesColumnsList(*pool, remote_database_name, {remote_table_name}, settings, settings.mysql_datatypes_support_level);
|
||||
|
||||
const auto columns = tables_and_columns.find(remote_table_name);
|
||||
if (columns == tables_and_columns.end())
|
||||
|
@ -83,6 +83,7 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
|
||||
{
|
||||
Poco::URI uri (filename);
|
||||
S3::URI s3_uri (uri);
|
||||
UInt64 s3_max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
||||
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
||||
UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
|
||||
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
||||
@ -93,6 +94,7 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
|
||||
secret_access_key,
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
format,
|
||||
s3_max_single_read_retries,
|
||||
min_upload_part_size,
|
||||
max_single_part_upload_size,
|
||||
max_connections,
|
||||
|
@ -109,12 +109,17 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
|
||||
Poco::URI uri (filename);
|
||||
S3::URI s3_uri (uri);
|
||||
/// Actually this parameters are not used
|
||||
UInt64 s3_max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
||||
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
||||
UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
|
||||
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
||||
storage = StorageS3::create(
|
||||
s3_uri, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name),
|
||||
format, min_upload_part_size, max_single_part_upload_size, max_connections,
|
||||
format,
|
||||
s3_max_single_read_retries,
|
||||
min_upload_part_size,
|
||||
max_single_part_upload_size,
|
||||
max_connections,
|
||||
getActualTableStructure(context), ConstraintsDescription{},
|
||||
context, compression_method, /*distributed_processing=*/true);
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
<load_balancing>in_order</load_balancing>
|
||||
<hedged_connection_timeout_ms>100</hedged_connection_timeout_ms>
|
||||
<receive_data_timeout_ms>2000</receive_data_timeout_ms>
|
||||
<async_socket_for_remote>1</async_socket_for_remote>
|
||||
<use_hedged_requests>1</use_hedged_requests>
|
||||
</default>
|
||||
</profiles>
|
||||
</yandex>
|
||||
|
@ -6,6 +6,8 @@
|
||||
<max_parallel_replicas>2</max_parallel_replicas>
|
||||
<hedged_connection_timeout_ms>100</hedged_connection_timeout_ms>
|
||||
<receive_data_timeout_ms>2000</receive_data_timeout_ms>
|
||||
<async_socket_for_remote>1</async_socket_for_remote>
|
||||
<use_hedged_requests>1</use_hedged_requests>
|
||||
</default>
|
||||
</profiles>
|
||||
</yandex>
|
||||
|
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_experimental_database_materialize_mysql>1</allow_experimental_database_materialize_mysql>
|
||||
<default_database_engine>Atomic</default_database_engine>
|
||||
<external_storage_max_read_rows>1</external_storage_max_read_rows>
|
||||
<external_storage_max_read_bytes>0</external_storage_max_read_bytes>
|
||||
</default>
|
||||
</profiles>
|
||||
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<profile>default</profile>
|
||||
</default>
|
||||
</users>
|
||||
</yandex>
|
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_experimental_database_materialize_mysql>1</allow_experimental_database_materialize_mysql>
|
||||
<default_database_engine>Atomic</default_database_engine>
|
||||
<external_storage_max_read_rows>0</external_storage_max_read_rows>
|
||||
<external_storage_max_read_bytes>1</external_storage_max_read_bytes>
|
||||
</default>
|
||||
</profiles>
|
||||
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<profile>default</profile>
|
||||
</default>
|
||||
</users>
|
||||
</yandex>
|
@ -842,3 +842,19 @@ def system_tables_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("CREATE TABLE system_tables_test.test (id int NOT NULL PRIMARY KEY) ENGINE=InnoDB")
|
||||
clickhouse_node.query("CREATE DATABASE system_tables_test ENGINE = MaterializeMySQL('{}:3306', 'system_tables_test', 'root', 'clickhouse')".format(service_name))
|
||||
check_query(clickhouse_node, "SELECT partition_key, sorting_key, primary_key FROM system.tables WHERE database = 'system_tables_test' AND name = 'test'", "intDiv(id, 4294967)\tid\tid\n")
|
||||
|
||||
def mysql_settings_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
mysql_node.query("CREATE DATABASE test_database")
|
||||
mysql_node.query("CREATE TABLE test_database.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))")
|
||||
mysql_node.query("INSERT INTO test_database.a VALUES(1, 'foo')")
|
||||
mysql_node.query("INSERT INTO test_database.a VALUES(2, 'bar')")
|
||||
|
||||
clickhouse_node.query("CREATE DATABASE test_database ENGINE = MaterializeMySQL('{}:3306', 'test_database', 'root', 'clickhouse')".format(service_name))
|
||||
check_query(clickhouse_node, "SELECT COUNT() FROM test_database.a FORMAT TSV", "2\n")
|
||||
|
||||
assert clickhouse_node.query("SELECT COUNT(DISTINCT blockNumber()) FROM test_database.a FORMAT TSV") == "2\n"
|
||||
|
||||
clickhouse_node.query("DROP DATABASE test_database")
|
||||
mysql_node.query("DROP DATABASE test_database")
|
||||
|
@ -16,7 +16,8 @@ cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node_db_ordinary = cluster.add_instance('node1', user_configs=["configs/users.xml"], with_mysql=False, stay_alive=True)
|
||||
node_db_atomic = cluster.add_instance('node2', user_configs=["configs/users_db_atomic.xml"], with_mysql=False, stay_alive=True)
|
||||
|
||||
node_disable_bytes_settings = cluster.add_instance('node3', user_configs=["configs/users_disable_bytes_settings.xml"], with_mysql=False, stay_alive=True)
|
||||
node_disable_rows_settings = cluster.add_instance('node4', user_configs=["configs/users_disable_rows_settings.xml"], with_mysql=False, stay_alive=True)
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
@ -289,5 +290,12 @@ def test_multi_table_update(started_cluster, started_mysql_8_0, started_mysql_5_
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
|
||||
def test_system_tables_table(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
def test_system_tables_table(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
|
||||
materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_disable_bytes_settings, node_disable_rows_settings])
|
||||
def test_mysql_settings(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
|
||||
materialize_with_ddl.mysql_settings_test(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
materialize_with_ddl.mysql_settings_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
|
@ -64,7 +64,7 @@ def test(started_cluster):
|
||||
assert end - start < 10
|
||||
|
||||
start = time.time()
|
||||
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, send_timeout=5, use_hedged_requests=0;')
|
||||
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, send_timeout=5, use_hedged_requests=0, async_socket_for_remote=1;')
|
||||
end = time.time()
|
||||
|
||||
assert end - start < 10
|
||||
@ -73,7 +73,7 @@ def test(started_cluster):
|
||||
assert error.find('DB::ReadBufferFromPocoSocket::nextImpl()') == -1
|
||||
|
||||
start = time.time()
|
||||
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, send_timeout=5;')
|
||||
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, send_timeout=5, use_hedged_requests=1, async_socket_for_remote=1;')
|
||||
end = time.time()
|
||||
|
||||
assert end - start < 10
|
||||
|
18
tests/integration/test_storage_mysql/configs/users.xml
Normal file
18
tests/integration/test_storage_mysql/configs/users.xml
Normal file
@ -0,0 +1,18 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_block_size>2</max_block_size>
|
||||
</default>
|
||||
</profiles>
|
||||
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<profile>default</profile>
|
||||
</default>
|
||||
</users>
|
||||
</yandex>
|
@ -9,6 +9,7 @@ cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True)
|
||||
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_mysql_cluster=True)
|
||||
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'], with_mysql=True)
|
||||
|
||||
create_table_sql_template = """
|
||||
CREATE TABLE `clickhouse`.`{}` (
|
||||
@ -260,6 +261,25 @@ def test_mysql_distributed(started_cluster):
|
||||
assert(result == 'host2\nhost4\n' or result == 'host3\nhost4\n')
|
||||
|
||||
|
||||
def test_external_settings(started_cluster):
|
||||
table_name = 'test_external_settings'
|
||||
conn = get_mysql_conn()
|
||||
create_mysql_table(conn, table_name)
|
||||
|
||||
node3.query('''
|
||||
CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql1:3306', 'clickhouse', '{}', 'root', 'clickhouse');
|
||||
'''.format(table_name, table_name))
|
||||
node3.query(
|
||||
"INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(
|
||||
table_name))
|
||||
assert node3.query("SELECT count() FROM {}".format(table_name)).rstrip() == '100'
|
||||
assert node3.query("SELECT sum(money) FROM {}".format(table_name)).rstrip() == '300'
|
||||
node3.query("select value from system.settings where name = 'max_block_size' FORMAT TSV") == "2\n"
|
||||
node3.query("select value from system.settings where name = 'external_storage_max_read_rows' FORMAT TSV") == "0\n"
|
||||
assert node3.query("SELECT COUNT(DISTINCT blockNumber()) FROM {} FORMAT TSV".format(table_name)) == '50\n'
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with contextmanager(started_cluster)() as cluster:
|
||||
for name, instance in list(cluster.instances.items()):
|
||||
|
@ -1,3 +1,5 @@
|
||||
import sys
|
||||
|
||||
from bottle import abort, route, run, request, response
|
||||
|
||||
|
||||
@ -21,4 +23,4 @@ def ping():
|
||||
return 'OK'
|
||||
|
||||
|
||||
run(host='0.0.0.0', port=8080)
|
||||
run(host='0.0.0.0', port=int(sys.argv[1]))
|
@ -0,0 +1,90 @@
|
||||
import http.server
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import struct
|
||||
import sys
|
||||
|
||||
|
||||
def gen_n_digit_number(n):
|
||||
assert 0 < n < 19
|
||||
return random.randint(10**(n-1), 10**n-1)
|
||||
|
||||
|
||||
def gen_line():
|
||||
columns = 4
|
||||
|
||||
row = []
|
||||
def add_number():
|
||||
digits = random.randint(1, 18)
|
||||
row.append(gen_n_digit_number(digits))
|
||||
|
||||
for i in range(columns // 2):
|
||||
add_number()
|
||||
row.append(1)
|
||||
for i in range(columns - 1 - columns // 2):
|
||||
add_number()
|
||||
|
||||
line = ",".join(map(str, row)) + "\n"
|
||||
return line.encode()
|
||||
|
||||
|
||||
random.seed("Unstable server/1.0")
|
||||
lines = b"".join((gen_line() for _ in range(500000)))
|
||||
|
||||
|
||||
class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
def do_HEAD(self):
|
||||
if self.path == "/root/test.csv":
|
||||
self.from_bytes = 0
|
||||
self.end_bytes = len(lines)
|
||||
self.size = self.end_bytes
|
||||
self.send_block_size = 256
|
||||
self.stop_at = random.randint(900000, 1200000) // self.send_block_size # Block size is 1024**2.
|
||||
|
||||
if "Range" in self.headers:
|
||||
cr = self.headers["Range"]
|
||||
parts = re.split("[ -/=]+", cr)
|
||||
assert parts[0] == "bytes"
|
||||
self.from_bytes = int(parts[1])
|
||||
if parts[2]:
|
||||
self.end_bytes = int(parts[2])+1
|
||||
self.send_response(206)
|
||||
self.send_header("Content-Range", f"bytes {self.from_bytes}-{self.end_bytes-1}/{self.size}")
|
||||
else:
|
||||
self.send_response(200)
|
||||
|
||||
self.send_header("Accept-Ranges", "bytes")
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.send_header("Content-Length", f"{self.end_bytes-self.from_bytes}")
|
||||
self.end_headers()
|
||||
|
||||
elif self.path == "/":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.end_headers()
|
||||
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.end_headers()
|
||||
|
||||
|
||||
def do_GET(self):
|
||||
self.do_HEAD()
|
||||
if self.path == "/root/test.csv":
|
||||
for c, i in enumerate(range(self.from_bytes, self.end_bytes, self.send_block_size)):
|
||||
self.wfile.write(lines[i:min(i+self.send_block_size, self.end_bytes)])
|
||||
if (c + 1) % self.stop_at == 0:
|
||||
#self.wfile._sock.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 0, 0))
|
||||
#self.wfile._sock.shutdown(socket.SHUT_RDWR)
|
||||
#self.wfile._sock.close()
|
||||
print('Dropping connection')
|
||||
break
|
||||
|
||||
elif self.path == "/":
|
||||
self.wfile.write(b"OK")
|
||||
|
||||
|
||||
httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
|
||||
httpd.serve_forever()
|
@ -96,7 +96,7 @@ def cluster():
|
||||
|
||||
prepare_s3_bucket(cluster)
|
||||
logging.info("S3 bucket created")
|
||||
run_s3_mock(cluster)
|
||||
run_s3_mocks(cluster)
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
@ -384,26 +384,32 @@ def test_s3_glob_scheherazade(cluster):
|
||||
assert run_query(instance, query).splitlines() == ["1001\t1001\t1001\t1001"]
|
||||
|
||||
|
||||
def run_s3_mock(cluster):
|
||||
logging.info("Starting s3 mock")
|
||||
container_id = cluster.get_container_id('resolver')
|
||||
current_dir = os.path.dirname(__file__)
|
||||
cluster.copy_file_to_container(container_id, os.path.join(current_dir, "s3_mock", "mock_s3.py"), "mock_s3.py")
|
||||
cluster.exec_in_container(container_id, ["python", "mock_s3.py"], detach=True)
|
||||
def run_s3_mocks(cluster):
|
||||
logging.info("Starting s3 mocks")
|
||||
mocks = (
|
||||
("mock_s3.py", "resolver", "8080"),
|
||||
("unstable_server.py", "resolver", "8081"),
|
||||
)
|
||||
for mock_filename, container, port in mocks:
|
||||
container_id = cluster.get_container_id(container)
|
||||
current_dir = os.path.dirname(__file__)
|
||||
cluster.copy_file_to_container(container_id, os.path.join(current_dir, "s3_mocks", mock_filename), mock_filename)
|
||||
cluster.exec_in_container(container_id, ["python", mock_filename, port], detach=True)
|
||||
|
||||
# Wait for S3 mock start
|
||||
for attempt in range(10):
|
||||
ping_response = cluster.exec_in_container(cluster.get_container_id('resolver'),
|
||||
["curl", "-s", "http://resolver:8080/"], nothrow=True)
|
||||
if ping_response != 'OK':
|
||||
if attempt == 9:
|
||||
assert ping_response == 'OK', 'Expected "OK", but got "{}"'.format(ping_response)
|
||||
# Wait for S3 mocks to start
|
||||
for mock_filename, container, port in mocks:
|
||||
for attempt in range(10):
|
||||
ping_response = cluster.exec_in_container(cluster.get_container_id(container),
|
||||
["curl", "-s", f"http://{container}:{port}/"], nothrow=True)
|
||||
if ping_response != 'OK':
|
||||
if attempt == 9:
|
||||
assert ping_response == 'OK', 'Expected "OK", but got "{}"'.format(ping_response)
|
||||
else:
|
||||
time.sleep(1)
|
||||
else:
|
||||
time.sleep(1)
|
||||
else:
|
||||
break
|
||||
break
|
||||
|
||||
logging.info("S3 mock started")
|
||||
logging.info("S3 mocks started")
|
||||
|
||||
|
||||
def replace_config(old, new):
|
||||
@ -523,6 +529,15 @@ def test_storage_s3_get_gzip(cluster, extension, method):
|
||||
run_query(instance, f"DROP TABLE {name}")
|
||||
|
||||
|
||||
def test_storage_s3_get_unstable(cluster):
|
||||
bucket = cluster.minio_bucket
|
||||
instance = cluster.instances["dummy"]
|
||||
table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
|
||||
get_query = f"SELECT count(), sum(column3) FROM s3('http://resolver:8081/{cluster.minio_bucket}/test.csv', 'CSV', '{table_format}') FORMAT CSV"
|
||||
result = run_query(instance, get_query)
|
||||
assert result.splitlines() == ["500000,500000"]
|
||||
|
||||
|
||||
def test_storage_s3_put_uncompressed(cluster):
|
||||
bucket = cluster.minio_bucket
|
||||
instance = cluster.instances["dummy"]
|
||||
|
@ -6,7 +6,9 @@
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>group_scale</name>
|
||||
<value>1000000</value>
|
||||
<values>
|
||||
<value>1000000</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
|
@ -4,13 +4,31 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
common_opts=(
|
||||
"--format=Null"
|
||||
# We check that even if max_threads is small, the setting max_distributed_connections
|
||||
# will allow to process queries on multiple shards concurrently.
|
||||
|
||||
"--max_threads=1"
|
||||
"--max_distributed_connections=3"
|
||||
)
|
||||
# We do sleep 1.5 seconds on ten machines.
|
||||
# If concurrency is one (bad) the query will take at least 15 seconds and the following loops are guaranteed to be infinite.
|
||||
# If concurrency is 10 (good), the query may take less than 10 second with non-zero probability
|
||||
# and the following loops will finish with probability 1 assuming independent random variables.
|
||||
|
||||
# NOTE: the test use higher timeout to avoid flakiness.
|
||||
timeout 9s ${CLICKHOUSE_CLIENT} "$@" "${common_opts[@]}" -q "select sleep(3) from remote('127.{1,2,3,4,5}', system.one)" --prefer_localhost_replica=0
|
||||
timeout 9s ${CLICKHOUSE_CLIENT} "$@" "${common_opts[@]}" -q "select sleep(3) from remote('127.{1,2,3,4,5}', system.one)" --prefer_localhost_replica=1
|
||||
while true; do
|
||||
timeout 10 ${CLICKHOUSE_CLIENT} --max_threads 1 --max_distributed_connections 10 --query "
|
||||
SELECT sleep(1.5) FROM remote('127.{1..10}', system.one) FORMAT Null" --prefer_localhost_replica=0 && break
|
||||
done
|
||||
|
||||
while true; do
|
||||
timeout 10 ${CLICKHOUSE_CLIENT} --max_threads 1 --max_distributed_connections 10 --query "
|
||||
SELECT sleep(1.5) FROM remote('127.{1..10}', system.one) FORMAT Null" --prefer_localhost_replica=1 && break
|
||||
done
|
||||
|
||||
# If max_distributed_connections is low and async_socket_for_remote is disabled,
|
||||
# the concurrency of distributed queries will be also low.
|
||||
|
||||
timeout 1 ${CLICKHOUSE_CLIENT} --max_threads 1 --max_distributed_connections 1 --async_socket_for_remote 0 --query "
|
||||
SELECT sleep(0.15) FROM remote('127.{1..10}', system.one) FORMAT Null" --prefer_localhost_replica=0 && echo 'Fail'
|
||||
|
||||
timeout 1 ${CLICKHOUSE_CLIENT} --max_threads 1 --max_distributed_connections 1 --async_socket_for_remote 0 --query "
|
||||
SELECT sleep(0.15) FROM remote('127.{1..10}', system.one) FORMAT Null" --prefer_localhost_replica=1 && echo 'Fail'
|
||||
|
||||
echo 'Ok'
|
||||
|
@ -15,13 +15,13 @@ drop table if exists simple;
|
||||
create table simple (i int, j int) engine = MergeTree order by i
|
||||
settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_max_concurrent_queries = 2;
|
||||
|
||||
insert into simple select number, number + 100 from numbers(1000);
|
||||
insert into simple select number, number + 100 from numbers(5000);
|
||||
"
|
||||
|
||||
query_id="long_running_query-$CLICKHOUSE_DATABASE"
|
||||
|
||||
echo "Spin up a long running query"
|
||||
${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "$query_id" > /dev/null 2>&1 &
|
||||
${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.1) from simple settings max_block_size = 1 format Null" --query_id "$query_id" > /dev/null 2>&1 &
|
||||
wait_for_query_to_start "$query_id"
|
||||
|
||||
# query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled
|
||||
|
@ -17,6 +17,9 @@ others
|
||||
0
|
||||
0
|
||||
0
|
||||
different types -- prohibited
|
||||
different types -- conversion
|
||||
0
|
||||
optimize_skip_unused_shards_limit
|
||||
0
|
||||
0
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
drop table if exists dist_01756;
|
||||
drop table if exists dist_01756_str;
|
||||
drop table if exists dist_01756_column;
|
||||
drop table if exists data_01756_str;
|
||||
|
||||
-- SELECT
|
||||
@ -90,8 +91,10 @@ select * from dist_01756 where dummy in (0); -- { serverError 507 }
|
||||
-- optimize_skip_unused_shards does not support non-constants
|
||||
select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 }
|
||||
select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 }
|
||||
-- wrong type
|
||||
-- wrong type (tuple)
|
||||
select * from dist_01756 where dummy in ('0'); -- { serverError 507 }
|
||||
-- intHash64 does not accept string
|
||||
select * from dist_01756 where dummy in ('0', '2'); -- { serverError 43 }
|
||||
-- NOT IN does not supported
|
||||
select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 }
|
||||
|
||||
@ -110,6 +113,7 @@ select (2 IN (2,)), * from dist_01756 where dummy in (0, 2) format Null;
|
||||
select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format Null;
|
||||
|
||||
-- different type
|
||||
select 'different types -- prohibited';
|
||||
create table data_01756_str (key String) engine=Memory();
|
||||
create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key));
|
||||
select * from dist_01756_str where key in ('0', '2');
|
||||
@ -117,6 +121,12 @@ select * from dist_01756_str where key in ('0', Null); -- { serverError 507 }
|
||||
select * from dist_01756_str where key in (0, 2); -- { serverError 53 }
|
||||
select * from dist_01756_str where key in (0, Null); -- { serverError 53 }
|
||||
|
||||
-- different type #2
|
||||
select 'different types -- conversion';
|
||||
create table dist_01756_column as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy);
|
||||
select * from dist_01756_column where dummy in (0, '255');
|
||||
select * from dist_01756_column where dummy in (0, '255foo'); -- { serverError 53 }
|
||||
|
||||
-- optimize_skip_unused_shards_limit
|
||||
select 'optimize_skip_unused_shards_limit';
|
||||
select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 }
|
||||
@ -124,4 +134,5 @@ select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_sha
|
||||
|
||||
drop table dist_01756;
|
||||
drop table dist_01756_str;
|
||||
drop table dist_01756_column;
|
||||
drop table data_01756_str;
|
||||
|
@ -10,7 +10,7 @@ CREATE TABLE foo_merge as foo ENGINE=Merge(currentDatabase(), '^foo');
|
||||
CREATE TABLE t2 (Id Int32, Val Int32, X Int32) Engine=Memory;
|
||||
INSERT INTO t2 values (4, 3, 4);
|
||||
|
||||
SET force_primary_key = 1;
|
||||
SET force_primary_key = 1, force_index_by_date=1;
|
||||
|
||||
SELECT * FROM foo_merge WHERE Val = 3 AND Id = 3;
|
||||
SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 AND t2.X == 4 GROUP BY X;
|
||||
|
@ -0,0 +1,21 @@
|
||||
1 [100,200] ['aa','bb'] [1,2]
|
||||
0 [0,1] ['aa','bb'] [0,0]
|
||||
1 [100,200] ['aa','bb'] [1,2]
|
||||
2 [100,200,300] ['a','b','c'] [10,20,30]
|
||||
3 [3,4] ['aa','bb'] [3,6]
|
||||
4 [4,5] ['aa','bb'] [4,8]
|
||||
0 [0,1] ['aa','bb'] [0,0]
|
||||
1 [100,200] ['aa','bb'] [1,2]
|
||||
2 [100,200,300] ['a','b','c'] [100,200,300]
|
||||
3 [3,4] ['aa','bb'] [3,6]
|
||||
4 [4,5] ['aa','bb'] [4,8]
|
||||
0 [0,1] ['aa','bb'] [0,0]
|
||||
1 [100,200] ['aa','bb'] [1,2]
|
||||
2 [100,200,300] ['a','b','c'] [100,200,300]
|
||||
3 [68,72] ['aa','bb'] [68,72]
|
||||
4 [4,5] ['aa','bb'] [4,8]
|
||||
0 0 aa 0
|
||||
1 1 bb 2
|
||||
2 2 aa 4
|
||||
3 3 aa 6
|
||||
4 4 aa 8
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user