From defed923313e2cf8c33d3b0890d6a2b86e563c45 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 12 Mar 2024 11:38:27 +0000 Subject: [PATCH 01/53] do nothing in `waitForOutdatedPartsToBeLoaded()` if loading is not required --- src/Storages/MergeTree/MergeTreeData.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d56cf761cf4..85389828e57 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1979,6 +1979,15 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ if (isStaticStorage()) return; + /// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForOutdatedPartsToBeLoaded` a lightweight check + { + std::unique_lock lock(outdated_data_parts_mutex); + if (outdated_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of outdated data parts was already canceled"); + if (outdated_data_parts_loading_finished) + return; + } + /// We need to load parts as fast as possible getOutdatedPartsLoadingThreadPool().enableTurboMode(); SCOPE_EXIT({ From 82c171b748c8f3de04369eb04769bb5ed5ef554b Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 11:30:15 +0000 Subject: [PATCH 02/53] add ranking functions + make the supported table more obvious --- .../sql-reference/window-functions/index.md | 176 ++++++++++++++---- 1 file changed, 142 insertions(+), 34 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 9b2ded7b6ce..2f44c36acb4 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -12,25 +12,23 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Support or workaround | +| Feature | Supported? | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | -| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported | -| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported | -| `ROWS` frame | supported | -| `RANGE` frame | supported, the default | -| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead (`RANGE` works with any numeric type). | -| `GROUPS` frame | not supported | -| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | -| `rank()`, `dense_rank()`, `row_number()` | supported | -| `lag/lead(value, offset)` | Not supported. Workarounds: | -| | 1) replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead` | -| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | +| `ROWS` frame | ✅ | +| `RANGE` frame | ✅ (the default) | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | +| `GROUPS` frame | ❌ | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | +| `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions -There are also the following window function that's specific to ClickHouse: +There is also the following ClickHouse specific window function: ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) @@ -89,6 +87,62 @@ These functions can be used only as a window function. Let's have a look at some examples of how window functions can be used. +### Numbering rows + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('Port Elizabeth Barbarians', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┐ +│ Michael Stanley │ 150000 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ +│ Charles Juarez │ 190000 │ 3 │ +│ Gary Chen │ 195000 │ 4 │ +│ Robert George │ 195000 │ 5 │ +└─────────────────┴────────┴─────┘ +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row, + rank() OVER (ORDER BY salary) AS rank, + dense_rank() OVER (ORDER BY salary) AS denseRank +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┬─rank─┬─denseRank─┐ +│ Michael Stanley │ 150000 │ 1 │ 1 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ 1 │ 1 │ +│ Charles Juarez │ 190000 │ 3 │ 3 │ 2 │ +│ Gary Chen │ 195000 │ 4 │ 4 │ 3 │ +│ Robert George │ 195000 │ 5 │ 4 │ 3 │ +└─────────────────┴────────┴─────┴──────┴───────────┘ +``` + +### Partitioning by column + ```sql CREATE TABLE wf_partition ( @@ -120,6 +174,8 @@ ORDER BY └──────────┴───────┴───────┴──────────────┘ ``` +### Frame bounding + ```sql CREATE TABLE wf_frame ( @@ -131,14 +187,19 @@ ENGINE = Memory; INSERT INTO wf_frame FORMAT Values (1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5); +``` --- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +```sql +-- Frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -151,7 +212,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- short form - no bound expression, no order by SELECT part_key, @@ -169,14 +232,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row +```sql +-- frame is bounded by the beginning of a partition and the current row SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -189,8 +257,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- short form (frame is bounded by the beggining of a partition and the current row) +```sql +-- short form (frame is bounded by the beginning of a partition and the current row) SELECT part_key, value, @@ -207,8 +277,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row, but order is backward +```sql +-- frame is bounded by the beginning of a partition and the current row, but order is backward SELECT part_key, value, @@ -225,14 +297,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ │ 1 │ 5 │ 5 │ [5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - 1 PRECEDING ROW AND CURRENT ROW SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -245,14 +322,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -264,7 +346,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4,5] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4 SELECT part_key, @@ -278,8 +362,11 @@ SELECT FROM wf_frame WINDOW w1 AS (PARTITION BY part_key ORDER BY order DESC), - w2 AS (PARTITION BY part_key ORDER BY order DESC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) + w2 AS ( + PARTITION BY part_key + ORDER BY order DESC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) ORDER BY part_key ASC, value ASC; @@ -290,7 +377,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │ │ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │ └──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘ +``` +```sql -- first_value and last_value respect the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -313,7 +402,9 @@ ORDER BY │ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │ │ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │ └────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘ +``` +```sql -- second value within the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -330,7 +421,9 @@ ORDER BY │ [1,2,3,4] │ 2 │ │ [2,3,4,5] │ 3 │ └────────────────┴──────────────┘ +``` +```sql -- second value within the frame + Null for missing values SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -351,6 +444,8 @@ ORDER BY ## Real world examples +The following examples solve common real-world problems. + ### Maximum/total salary per department. ```sql @@ -369,7 +464,9 @@ INSERT INTO employees FORMAT Values ('IT', 'Tim', 200), ('IT', 'Anna', 300), ('IT', 'Elen', 500); +``` +```sql SELECT department, employee_name AS emp, @@ -386,8 +483,10 @@ FROM max(salary) OVER wndw AS max_salary_per_dep, sum(salary) OVER wndw AS total_salary_per_dep FROM employees - WINDOW wndw AS (PARTITION BY department - rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) + WINDOW wndw AS ( + PARTITION BY department + rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) ORDER BY department ASC, employee_name ASC @@ -421,7 +520,9 @@ INSERT INTO warehouse VALUES ('sku1', '2020-01-01', 1), ('sku1', '2020-02-01', 1), ('sku1', '2020-03-01', 1); +``` +```sql SELECT item, ts, @@ -461,13 +562,18 @@ insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87), ('cpu_temp', '2020-01-01 00:00:05', 87), ('cpu_temp', '2020-01-01 00:00:06', 87), ('cpu_temp', '2020-01-01 00:00:07', 87); +``` + +```sql SELECT metric, ts, value, - avg(value) OVER - (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW) - AS moving_avg_temp + avg(value) OVER ( + PARTITION BY metric + ORDER BY ts ASC + Rows BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS moving_avg_temp FROM sensors ORDER BY metric ASC, @@ -536,7 +642,9 @@ insert into sensors values('ambient_temp', '2020-01-01 00:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16); +``` +```sql SELECT metric, ts, From 2df818866797c23fc38063663441280059fad565 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 11:54:04 +0000 Subject: [PATCH 03/53] Agg functions --- .../sql-reference/window-functions/index.md | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 2f44c36acb4..19821781d0e 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -101,9 +101,9 @@ Engine = Memory; INSERT INTO salaries FORMAT Values ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), - ('Port Elizabeth Barbarians', 'Charles Juarez', 190000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), - ('Port Elizabeth Barbarians', 'Scott Harrison', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); ``` @@ -141,6 +141,46 @@ FROM salaries; └─────────────────┴────────┴─────┴──────┴───────────┘ ``` +### Aggregation functions + +Compare each player's salary to the average for their team. + +```sql +SELECT player, salary, team, + avg(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 170000 │ 20000 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 170000 │ -20000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 180000 │ -30000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + +Compare each player's salary to the maximum for their team. + +```sql +SELECT player, salary, team, + max(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 190000 │ 0 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 190000 │ -40000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 195000 │ -45000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + ### Partitioning by column ```sql @@ -446,7 +486,7 @@ ORDER BY The following examples solve common real-world problems. -### Maximum/total salary per department. +### Maximum/total salary per department ```sql CREATE TABLE employees @@ -502,7 +542,7 @@ FROM └────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘ ``` -### Cumulative sum. +### Cumulative sum ```sql CREATE TABLE warehouse From a85886c2e0b1d2997b1b5192fe7a489181668041 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 16:26:43 +0000 Subject: [PATCH 04/53] AggregatingMergeTree: Split table creation and MV definition + add more to example --- .../mergetree-family/aggregatingmergetree.md | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 62191d9b5e4..7a449f400fd 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -68,6 +68,12 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} +The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist: + +```sql +CREATE DATABASE test; +``` + We will create the table `test.visits` that contain the raw data: ``` sql @@ -80,17 +86,24 @@ CREATE TABLE test.visits ) ENGINE = MergeTree ORDER BY (StartDate, CounterID); ``` +Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users. + `AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: ``` sql -CREATE MATERIALIZED VIEW test.mv_visits -( +CREATE TABLE test.agg_visits ( StartDate DateTime64 NOT NULL, CounterID UInt64, Visits AggregateFunction(sum, Nullable(Int32)), Users AggregateFunction(uniq, Nullable(Int32)) ) -ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID) +ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID); +``` + +And then let's create a materialized view that populates `test.agg_visits` from `test.visits` : + +```sql +CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits AS SELECT StartDate, CounterID, @@ -104,25 +117,45 @@ Inserting data into the `test.visits` table. ``` sql INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 3, 4) -INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 6, 3) + VALUES (1667446031000, 1, 3, 4), (1667446031000, 1, 6, 3); ``` -The data is inserted in both the table and the materialized view `test.mv_visits`. +The data is inserted in both `test.visits` and `test.agg_visits`. To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: -``` sql +```sql SELECT StartDate, sumMerge(Visits) AS Visits, uniqMerge(Users) AS Users -FROM test.mv_visits +FROM test.agg_visits GROUP BY StartDate ORDER BY StartDate; ``` +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 9 │ 2 │ +└─────────────────────────┴────────┴───────┘ +``` + +And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records: + +```sql +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5); +``` + +If we then run the `SELECT` query again, we'll see the following output: + +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 16 │ 3 │ +│ 2022-11-26 07:00:31.000 │ 5 │ 1 │ +└─────────────────────────┴────────┴───────┘ +``` + ## Related Content - Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) From 071a8ff95f5656ab18433dc03b19fce12e5855ab Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 25 Mar 2024 18:55:46 +0100 Subject: [PATCH 05/53] less unformatted exceptions --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 34 +++++------ .../MergeTree/MergeTreeDataMergerMutator.h | 10 ++-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 41 +++++++------ .../MergeTree/ReplicatedMergeTreeQueue.h | 8 +-- src/Storages/StorageMergeTree.cpp | 58 +++++++++---------- src/Storages/StorageMergeTree.h | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 20 +++---- 9 files changed, 91 insertions(+), 94 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9f3b48f88c..7a2ddc77724 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7773,11 +7773,11 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & return result; } -bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) +bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason) { if (left->getProjectionParts().size() != right->getProjectionParts().size()) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", left->getProjectionParts().size(), left->name, @@ -7791,7 +7791,7 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const { if (!right->hasProjection(name)) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name ); return false; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8305c7c6ce9..9081d384a26 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -418,7 +418,7 @@ public: static ReservationPtr tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); static ReservationPtr reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); - static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason); + static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason); StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 53d49b51e8f..2d49e1df19b 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -136,7 +136,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( const AllowedMergingPredicate & can_merge_callback, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint) { MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn, partitions_hint); @@ -145,7 +145,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (data_parts.empty()) { - out_disable_reason = "There are no parts in the table"; + out_disable_reason = PreformattedMessage::create("There are no parts in the table"); return SelectPartsDecision::CANNOT_SELECT; } @@ -153,7 +153,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (info.parts_selected_precondition == 0) { - out_disable_reason = "No parts satisfy preconditions for merge"; + out_disable_reason = PreformattedMessage::create("No parts satisfy preconditions for merge"); return SelectPartsDecision::CANNOT_SELECT; } @@ -177,9 +177,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /*optimize_skip_merged_partitions=*/true); } - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "There is no need to merge parts according to merge selector algorithm"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; } @@ -196,7 +196,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart auto metadata_snapshot = data.getInMemoryMetadataPtr(); - String out_reason; + PreformattedMessage out_reason; MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn, out_reason); if (info.parts_selected_precondition == 0) @@ -223,7 +223,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart for (size_t i = 0; i < all_partition_ids.size(); ++i) { auto future_part = std::make_shared(); - String out_disable_reason; + PreformattedMessage out_disable_reason; /// This method should have been const, but something went wrong... it's const with dry_run = true auto status = const_cast(this)->selectPartsToMergeFromRanges( future_part, /*aggressive*/ false, max_total_size_to_merge, merge_with_ttl_allowed, @@ -232,7 +232,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (status == SelectPartsDecision::SELECTED) res.insert(all_partition_ids[i]); else - LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason); + LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason.text); } String best_partition_id_to_optimize = getBestPartitionToOptimizeEntire(info.partitions_info); @@ -331,7 +331,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const + PreformattedMessage & out_disable_reason) const { MergeSelectingInfo res; @@ -444,7 +444,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run) { const auto data_settings = data.getSettings(); @@ -515,7 +515,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( if (parts_to_merge.empty()) { - out_disable_reason = "Did not find any parts to merge (with usual merge selectors)"; + out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors)"); return SelectPartsDecision::CANNOT_SELECT; } } @@ -573,20 +573,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); if (parts.empty()) { - out_disable_reason = "There are no parts inside partition"; + out_disable_reason = PreformattedMessage::create("There are no parts inside partition"); return SelectPartsDecision::CANNOT_SELECT; } if (!final && parts.size() == 1) { - out_disable_reason = "There is only one part inside partition"; + out_disable_reason = PreformattedMessage::create("There is only one part inside partition"); return SelectPartsDecision::CANNOT_SELECT; } @@ -595,7 +595,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 && (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot))) { - out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; + out_disable_reason = PreformattedMessage::create("Partition skipped due to optimize_skip_merged_partitions"); return SelectPartsDecision::NOTHING_TO_MERGE; } @@ -636,7 +636,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100)); } - out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); + out_disable_reason = PreformattedMessage::create("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); return SelectPartsDecision::CANNOT_SELECT; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 669ee040af3..aad34bfb914 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -43,7 +43,7 @@ public: using AllowedMergingPredicate = std::function; + PreformattedMessage &)>; explicit MergeTreeDataMergerMutator(MergeTreeData & data_); @@ -92,7 +92,7 @@ public: const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const; + PreformattedMessage & out_disable_reason) const; /// The third step of selecting parts to merge: takes ranges that we can merge, and selects parts that we want to merge SelectPartsDecision selectPartsToMergeFromRanges( @@ -103,7 +103,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run = false); String getBestPartitionToOptimizeEntire(const PartitionsInfo & partitions_info) const; @@ -129,7 +129,7 @@ public: const AllowedMergingPredicate & can_merge, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint = nullptr); /** Select all the parts in the specified partition for merge, if possible. @@ -144,7 +144,7 @@ public: bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); /** Creates a task to merge parts. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 42f564f40da..d7168ff57be 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2266,7 +2266,7 @@ bool BaseMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction *, - String & out_reason) const + PreformattedMessage & out_reason) const { if (left) return canMergeTwoParts(left, right, out_reason); @@ -2278,7 +2278,7 @@ template bool BaseMergePredicate::canMergeTwoParts( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const + PreformattedMessage & out_reason) const { /// A sketch of a proof of why this method actually works: /// @@ -2322,19 +2322,19 @@ bool BaseMergePredicate::canMergeTwoParts( { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = "Quorum insert for part " + part->name + " is currently in progress"; + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } } @@ -2348,7 +2348,7 @@ bool BaseMergePredicate::canMergeTwoParts( { if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id)) { - out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); + out_reason = PreformattedMessage::create("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); return false; } @@ -2360,8 +2360,7 @@ bool BaseMergePredicate::canMergeTwoParts( auto block_it = block_numbers.upper_bound(left_max_block); if (block_it != block_numbers.end() && *block_it < right_min_block) { - out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts " - + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("Block number {} is still being inserted between parts {} and {}", *block_it, left->name, right->name); return false; } } @@ -2380,7 +2379,7 @@ bool BaseMergePredicate::canMergeTwoParts( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2397,9 +2396,9 @@ bool BaseMergePredicate::canMergeTwoParts( Strings covered = virtual_parts_->getPartsCoveredBy(gap_part_info); if (!covered.empty()) { - out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() - + " to " + covered.back() + ") that are still not present or being processed by " - + " other background process on this replica between " + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("There are {} parts (from {} to {}) " + "that are still not present or being processed by other background process " + "on this replica between {} and {}", covered.size(), covered.front(), covered.back(), left->name, right->name); return false; } } @@ -2415,8 +2414,8 @@ bool BaseMergePredicate::canMergeTwoParts( if (left_mutation_ver != right_mutation_ver) { - out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " - + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; + out_reason = PreformattedMessage::create("Current mutation versions of parts {} and {} differ: " + "{} and {} respectively", left->name, right->name, left_mutation_ver, right_mutation_ver); return false; } } @@ -2427,23 +2426,23 @@ bool BaseMergePredicate::canMergeTwoParts( template bool BaseMergePredicate::canMergeSinglePart( const MergeTreeData::DataPartPtr & part, - String & out_reason) const + PreformattedMessage & out_reason) const { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } @@ -2458,7 +2457,7 @@ bool BaseMergePredicate::canMergeSinglePart( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2467,7 +2466,7 @@ bool BaseMergePredicate::canMergeSinglePart( } -bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const +bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const { std::lock_guard lock(queue.state_mutex); for (const auto & entry : queue.queue) @@ -2480,7 +2479,7 @@ bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const Mer if (part->info.isDisjoint(MergeTreePartInfo::fromPartName(part_name, queue.format_version))) continue; - out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); + out_reason = PreformattedMessage::create("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); return true; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index b17e7819946..85f3aacc766 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -505,19 +505,19 @@ public: bool operator()(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction * txn, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge with these two parts? /// (assuming that no merge was assigned after the predicate was constructed) /// If we can't and out_reason is not nullptr, set it to the reason why we can't merge. bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge this part and some other part? /// For example a merge of a part and itself is needed for TTL. /// This predicate is checked for the first part of each range. - bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; + bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const; CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_); @@ -561,7 +561,7 @@ public: /// Returns true if part is needed for some REPLACE_RANGE entry. /// We should not drop part in this case, because replication queue may stuck without that part. - bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const; + bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const; /// Return nonempty optional of desired mutation version and alter version. /// If we have no alter (modify/drop) mutations in mutations queue, than we return biggest possible diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c87681a1418..c41943b269d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -933,7 +933,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( bool aggressive, const String & partition_id, bool final, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, TableLockHolder & /* table_lock_holder */, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -951,7 +951,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( CurrentlyMergingPartsTaggerPtr merging_tagger; MergeList::EntryPtr merge_entry; - auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String & disable_reason) -> bool + auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, PreformattedMessage & disable_reason) -> bool { if (tx) { @@ -960,7 +960,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))) { - disable_reason = "Some part is not visible in transaction"; + disable_reason = PreformattedMessage::create("Some part is not visible in transaction"); return false; } @@ -968,7 +968,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && left->version.isRemovalTIDLocked()) || (right && right->version.isRemovalTIDLocked())) { - disable_reason = "Some part is locked for removal in another cuncurrent transaction"; + disable_reason = PreformattedMessage::create("Some part is locked for removal in another cuncurrent transaction"); return false; } } @@ -979,7 +979,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( { if (currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } else @@ -988,13 +988,13 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (currently_merging_mutating_parts.contains(left) || currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - disable_reason = "Some parts have different mutation version"; + disable_reason = PreformattedMessage::create("Some parts have different mutation version"); return false; } @@ -1004,7 +1004,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( auto max_possible_level = getMaxLevelInBetween(left, right); if (max_possible_level > std::max(left->info.level, right->info.level)) { - disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); + disable_reason = PreformattedMessage::create("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); return false; } @@ -1013,11 +1013,11 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String & disable_reason) -> bool + auto is_background_memory_usage_ok = [](PreformattedMessage & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; - disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + disable_reason = PreformattedMessage::create("Current background tasks memory usage ({}) is more than the limit ({})", formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); return false; @@ -1045,7 +1045,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( out_disable_reason); } else - out_disable_reason = "Current value of max_source_parts_size is zero"; + out_disable_reason = PreformattedMessage::create("Current value of max_source_parts_size is zero"); } } else @@ -1086,7 +1086,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for(lock, timeout)) { - out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); + out_disable_reason = PreformattedMessage::create("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); break; } } @@ -1102,9 +1102,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (select_decision != SelectPartsDecision::SELECTED) { - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "Cannot select parts for optimization"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "Cannot select parts for optimization"; return {}; } @@ -1125,7 +1125,7 @@ bool StorageMergeTree::merge( const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1180,7 +1180,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p } MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & /* disable_reason */, TableLockHolder & /* table_lock_holder */, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { if (current_mutations_by_version.empty()) @@ -1396,7 +1396,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merger_mutator.merges_blocker.isCancelled()) return false; - String out_reason; + PreformattedMessage out_reason; merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, out_reason, shared_lock, lock, txn); if (!merge_entry && !current_mutations_by_version.empty()) @@ -1559,14 +1559,12 @@ bool StorageMergeTree::optimize( auto txn = local_context->getCurrentTransaction(); - String disable_reason; + PreformattedMessage disable_reason; if (!partition && final) { if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) { - constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; - disable_reason = "only ReplacingMergeTree can be CLEANUP"; - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, "Cannot OPTIMIZE with CLEANUP table: only ReplacingMergeTree can be CLEANUP"); } if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) @@ -1592,12 +1590,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } @@ -1620,12 +1618,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c384a391291..02217e6d138 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -175,7 +175,7 @@ private: const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); @@ -202,7 +202,7 @@ private: bool aggressive, const String & partition_id, bool final, - String & disable_reason, + PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -211,7 +211,7 @@ private: MergeMutateSelectedEntryPtr selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & disable_reason, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); /// For current mutations queue, returns maximum version of mutation for a part, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c41403e312b..2feaca6ba48 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3791,7 +3791,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); } - String out_reason; + PreformattedMessage out_reason; if (can_assign_merge && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR, out_reason, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) @@ -5773,7 +5773,7 @@ bool StorageReplicatedMergeTree::optimize( future_merged_part->uuid = UUIDHelpers::generateV4(); constexpr const char * unknown_disable_reason = "unknown reason"; - String disable_reason = unknown_disable_reason; + PreformattedMessage disable_reason = PreformattedMessage::create(unknown_disable_reason); SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; if (partition_id.empty()) @@ -5796,10 +5796,10 @@ bool StorageReplicatedMergeTree::optimize( if (select_decision != SelectPartsDecision::SELECTED) { constexpr const char * message_fmt = "Cannot select parts for optimization: {}"; - assert(disable_reason != unknown_disable_reason); + assert(disable_reason.text != unknown_disable_reason); if (!partition_id.empty()) - disable_reason += fmt::format(" (in partition {})", partition_id); - return handle_noop(message_fmt, disable_reason); + disable_reason.text += fmt::format(" (in partition {})", partition_id); + return handle_noop(message_fmt, disable_reason.text); } ReplicatedMergeTreeLogEntryData merge_entry; @@ -8465,9 +8465,9 @@ void StorageReplicatedMergeTree::movePartitionToShard( } /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason.text); } { @@ -8725,18 +8725,18 @@ bool StorageReplicatedMergeTree::dropPartImpl( /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already /// finished the merge. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } if (merge_pred.partParticipatesInReplaceRange(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } From f8cf85edda3e47ad2b9435f096914e1ecb7b3645 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Tue, 2 Apr 2024 17:48:28 +0100 Subject: [PATCH 06/53] Bump From e7e20acc5b13d4a84754d42a38356f3b009531c0 Mon Sep 17 00:00:00 2001 From: justindeguzman Date: Tue, 2 Apr 2024 10:22:57 -0700 Subject: [PATCH 07/53] Bump From d9048766933a11b283e0e2345d6bc6c9d0a57699 Mon Sep 17 00:00:00 2001 From: justindeguzman Date: Tue, 2 Apr 2024 10:23:50 -0700 Subject: [PATCH 08/53] Bump From 606058c1ca489f8fcc77ade96d5d1e39573a0628 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:29:07 +0000 Subject: [PATCH 09/53] Consolidate SQL compat alias docs into data type docs + improve sidebar order --- .../data-types/aggregatefunction.md | 2 +- docs/en/sql-reference/data-types/array.md | 2 +- docs/en/sql-reference/data-types/boolean.md | 2 +- docs/en/sql-reference/data-types/date.md | 2 +- docs/en/sql-reference/data-types/date32.md | 2 +- docs/en/sql-reference/data-types/datetime.md | 2 +- .../en/sql-reference/data-types/datetime64.md | 2 +- docs/en/sql-reference/data-types/decimal.md | 2 +- docs/en/sql-reference/data-types/enum.md | 2 +- .../sql-reference/data-types/fixedstring.md | 4 +-- docs/en/sql-reference/data-types/float.md | 2 +- docs/en/sql-reference/data-types/geo.md | 4 +-- docs/en/sql-reference/data-types/index.md | 4 +-- docs/en/sql-reference/data-types/int-uint.md | 2 +- docs/en/sql-reference/data-types/ipv4.md | 2 +- docs/en/sql-reference/data-types/ipv6.md | 2 +- docs/en/sql-reference/data-types/json.md | 2 +- .../data-types/lowcardinality.md | 6 ++-- docs/en/sql-reference/data-types/map.md | 8 +++--- .../data-types/multiword-types.md | 27 ------------------ docs/en/sql-reference/data-types/nullable.md | 4 +-- .../data-types/simpleaggregatefunction.md | 2 ++ docs/en/sql-reference/data-types/string.md | 4 +-- docs/en/sql-reference/data-types/tuple.md | 4 +-- docs/en/sql-reference/data-types/uuid.md | 2 +- docs/en/sql-reference/data-types/variant.md | 6 ++-- .../data-types/multiword-types.md | 28 ------------------- .../data-types/multiword-types.mdx | 10 ------- 28 files changed, 39 insertions(+), 102 deletions(-) delete mode 100644 docs/en/sql-reference/data-types/multiword-types.md delete mode 100644 docs/ru/sql-reference/data-types/multiword-types.md delete mode 100644 docs/zh/sql-reference/data-types/multiword-types.mdx diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index fe6d7ebe0dc..87511a505dc 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/aggregatefunction -sidebar_position: 53 +sidebar_position: 46 sidebar_label: AggregateFunction --- diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 0ee7c8de93c..e5a8ce5d18b 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/array -sidebar_position: 52 +sidebar_position: 32 sidebar_label: Array(T) --- diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 70abf767a41..4c59bd947de 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/boolean -sidebar_position: 43 +sidebar_position: 22 sidebar_label: Boolean --- diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 26e4610aec7..7adee3bbf3c 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date -sidebar_position: 47 +sidebar_position: 12 sidebar_label: Date --- diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 38a07cd817d..a08c931b7fc 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date32 -sidebar_position: 48 +sidebar_position: 14 sidebar_label: Date32 --- diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598..889bc682d91 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime -sidebar_position: 48 +sidebar_position: 16 sidebar_label: DateTime --- diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 504d0e2b0a6..ef452a723e6 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime64 -sidebar_position: 49 +sidebar_position: 18 sidebar_label: DateTime64 --- diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index 2b32e72a28f..dfdefdff5a5 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/decimal -sidebar_position: 42 +sidebar_position: 6 sidebar_label: Decimal --- diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index 02e73a0360e..ccfeb7f3416 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/enum -sidebar_position: 50 +sidebar_position: 20 sidebar_label: Enum --- diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index a56b3fccbc1..0316df7fe34 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/fixedstring -sidebar_position: 45 +sidebar_position: 10 sidebar_label: FixedString(N) --- -# FixedString +# FixedString(N) A fixed-length string of `N` bytes (neither characters nor code points). diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index be7b2a7fcd8..23131d5b4fe 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/float -sidebar_position: 41 +sidebar_position: 4 sidebar_label: Float32, Float64 --- diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 1d37b829dd5..7e3c32b3451 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -1,8 +1,8 @@ --- slug: /en/sql-reference/data-types/geo -sidebar_position: 62 +sidebar_position: 54 sidebar_label: Geo -title: "Geo Data Types" +title: "Geometric" --- ClickHouse supports data types for representing geographical objects — locations, lands, etc. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index ffd063590fa..fcb0b60d022 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/ sidebar_label: List of data types -sidebar_position: 37 +sidebar_position: 1 --- -# ClickHouse Data Types +# Data Types in ClickHouse ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 520454a859f..52d2982de19 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/int-uint -sidebar_position: 40 +sidebar_position: 2 sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- diff --git a/docs/en/sql-reference/data-types/ipv4.md b/docs/en/sql-reference/data-types/ipv4.md index 288806f47b3..637ed543e08 100644 --- a/docs/en/sql-reference/data-types/ipv4.md +++ b/docs/en/sql-reference/data-types/ipv4.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv4 -sidebar_position: 59 +sidebar_position: 28 sidebar_label: IPv4 --- diff --git a/docs/en/sql-reference/data-types/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md index 97959308b58..642a7db81fc 100644 --- a/docs/en/sql-reference/data-types/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv6 -sidebar_position: 60 +sidebar_position: 30 sidebar_label: IPv6 --- diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index fd548a0d5a2..39e37abad82 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/json -sidebar_position: 54 +sidebar_position: 26 sidebar_label: JSON --- diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index db10103282d..133ac2bd72e 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/lowcardinality -sidebar_position: 51 -sidebar_label: LowCardinality +sidebar_position: 42 +sidebar_label: LowCardinality(T) --- -# LowCardinality +# LowCardinality(T) Changes the internal representation of other data types to be dictionary-encoded. diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index e0c8b98f9f8..2c734969afc 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/data-types/map -sidebar_position: 65 -sidebar_label: Map(key, value) +sidebar_position: 36 +sidebar_label: Map(K, V) --- -# Map(key, value) +# Map(K, V) -`Map(key, value)` data type stores `key:value` pairs. +`Map(K, V)` data type stores `key:value` pairs. **Parameters** diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md deleted file mode 100644 index ebbe1d84544..00000000000 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -slug: /en/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility. - -## Multiword Types Support - -| Multiword types | Simple types | -|----------------------------------|--------------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 5504765e4a0..abcb87a0c1b 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/nullable -sidebar_position: 55 -sidebar_label: Nullable +sidebar_position: 44 +sidebar_label: Nullable(T) --- # Nullable(T) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 517a28576f0..39f8409c1e1 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -1,5 +1,7 @@ --- slug: /en/sql-reference/data-types/simpleaggregatefunction +sidebar_position: 48 +sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index f891a9303e5..8a4f346fdfc 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/string -sidebar_position: 44 +sidebar_position: 8 sidebar_label: String --- @@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR Aliases: -- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. +- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`, `CHAR LARGE OBJECT`, `CHAR VARYING`, `CHARACTER LARGE OBJECT`, `CHARACTER VARYING`, `NCHAR LARGE OBJECT`, `NCHAR VARYING`, `NATIONAL CHARACTER LARGE OBJECT`, `NATIONAL CHARACTER VARYING`, `NATIONAL CHAR VARYING`, `NATIONAL CHARACTER`, `NATIONAL CHAR`, `BINARY LARGE OBJECT`, `BINARY VARYING`, ## Encodings diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index 8f87eeca075..0525a3b0476 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/tuple -sidebar_position: 54 +sidebar_position: 34 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) +# Tuple(T1, T2, ...) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index 40f756b9588..75e163f5063 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/uuid -sidebar_position: 46 +sidebar_position: 24 sidebar_label: UUID --- diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e97..1a9f1dde8d3 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/variant -sidebar_position: 55 -sidebar_label: Variant +sidebar_position: 40 +sidebar_label: Variant(T1, T2, ...) --- -# Variant(T1, T2, T3, ...) +# Variant(T1, T2, ...) This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). diff --git a/docs/ru/sql-reference/data-types/multiword-types.md b/docs/ru/sql-reference/data-types/multiword-types.md deleted file mode 100644 index cca2d71e480..00000000000 --- a/docs/ru/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -slug: /ru/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Составные типы ---- - -# Составные типы {#multiword-types} - -При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL. - -## Поддержка составных типов {#multiword-types-support} - -| Составные типы | Обычные типы | -|-------------------------------------|-----------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/zh/sql-reference/data-types/multiword-types.mdx b/docs/zh/sql-reference/data-types/multiword-types.mdx deleted file mode 100644 index 85431d47efd..00000000000 --- a/docs/zh/sql-reference/data-types/multiword-types.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -slug: /zh/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -import Content from '@site/docs/en/sql-reference/data-types/multiword-types.md'; - - From c7a28b137ad9ca75c44bb531fc79ba034e3e311d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Apr 2024 07:24:20 +0000 Subject: [PATCH 10/53] Update version_date.tsv and changelogs after v24.3.2.23-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.3.2.23-lts.md | 29 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.3.2.23-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 3daa62cb212..346868e19c4 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index ace01ae9a9f..36f09c092f8 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index e92823b686a..531a50efe96 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v24.3.2.23-lts.md b/docs/changelogs/v24.3.2.23-lts.md new file mode 100644 index 00000000000..4d59a1cedf6 --- /dev/null +++ b/docs/changelogs/v24.3.2.23-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.2.23-lts (8b7d910960c) FIXME as compared to v24.3.1.2672-lts (2c5c589a882) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). +* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#62030](https://github.com/ClickHouse/ClickHouse/issues/62030):. [#61869](https://github.com/ClickHouse/ClickHouse/pull/61869) ([Nikita Fomichev](https://github.com/fm4v)). +* Backported in [#62057](https://github.com/ClickHouse/ClickHouse/issues/62057): ... [#62044](https://github.com/ClickHouse/ClickHouse/pull/62044) ([Max K.](https://github.com/maxknv)). +* Backported in [#62204](https://github.com/ClickHouse/ClickHouse/issues/62204):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix some crashes with analyzer and group_by_use_nulls. [#61933](https://github.com/ClickHouse/ClickHouse/pull/61933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix scalars create as select [#61998](https://github.com/ClickHouse/ClickHouse/pull/61998) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ignore IfChainToMultiIfPass if returned type changed. [#62059](https://github.com/ClickHouse/ClickHouse/pull/62059) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix type for ConvertInToEqualPass [#62066](https://github.com/ClickHouse/ClickHouse/pull/62066) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Revert output Pretty in tty [#62090](https://github.com/ClickHouse/ClickHouse/pull/62090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index ca1a23a99db..060a0107c1e 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.3.2.23-lts 2024-04-03 v24.3.1.2672-lts 2024-03-27 v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 From 8f40db2fb2c520a8907914f8f5799026c43ed3f2 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 3 Apr 2024 08:26:52 +0100 Subject: [PATCH 11/53]
missing closing / --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 19821781d0e..32ebc6d028f 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -24,7 +24,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions From 2f45d98c970740c2263812fe3044616787f49d96 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 10:03:04 +0000 Subject: [PATCH 12/53] Docs: Improve wording of DROP TABLE docs --- docs/en/sql-reference/statements/drop.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 159ab09ab94..833ff756449 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -20,11 +20,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] ## DROP TABLE -Deletes the table. -In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query. +Deletes one or more tables. :::tip -Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +To undo the deletion of a table, please see see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) ::: Syntax: @@ -33,7 +32,9 @@ Syntax: DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC] ``` -Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted. +Limitations: +- If the clause `IF EMPTY` is specified, the server checks the emptiness of the table only on the replica which received the query. +- Deleting multiple tables at once is not an atomic operation, i.e. if the deletion of a table fails, subsequent tables will not be deleted. ## DROP DICTIONARY From 0782ccaa91fa4a850cea00f52a660ee818e8e3c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 12:53:09 +0200 Subject: [PATCH 13/53] Update docs/en/sql-reference/statements/drop.md Co-authored-by: Han Fei --- docs/en/sql-reference/statements/drop.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 833ff756449..98b849ecf3b 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -23,7 +23,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] Deletes one or more tables. :::tip -To undo the deletion of a table, please see see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +To undo the deletion of a table, please see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) ::: Syntax: From 8e6cbc8b31c93e3825219cc47463c0e854b0a26d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 15:13:59 +0200 Subject: [PATCH 14/53] several fixes for client's keep alive connections --- .../Net/include/Poco/Net/HTTPClientSession.h | 13 + base/poco/Net/include/Poco/Net/HTTPMessage.h | 3 + .../Net/include/Poco/Net/HTTPServerParams.h | 2 +- .../Net/include/Poco/Net/HTTPServerSession.h | 2 + base/poco/Net/src/HTTPClientSession.cpp | 39 +- base/poco/Net/src/HTTPMessage.cpp | 41 ++ base/poco/Net/src/HTTPServerConnection.cpp | 13 +- base/poco/Net/src/HTTPServerSession.cpp | 6 + src/Common/HTTPConnectionPool.cpp | 94 ++-- src/Common/tests/gtest_connection_pool.cpp | 423 ++++++++++++++---- src/Core/ServerSettings.h | 4 +- src/IO/ConnectionTimeouts.cpp | 6 +- 12 files changed, 502 insertions(+), 144 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index 1cef988566c..b418937c4d5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -213,6 +213,13 @@ namespace Net Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. + bool isKeepAliveExpired(double reliability = 1.0) const; + /// Returns if the connection is expired with some margin as fraction of timeout as reliability + + double getKeepAliveReliability() const; + /// Returns the current fraction of keep alive timeout when connection is considered safe to use + /// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException + virtual std::ostream & sendRequest(HTTPRequest & request); /// Sends the header for the given HTTP request to /// the server. @@ -361,6 +368,7 @@ namespace Net Poco::SharedPtr _pRequestStream; Poco::SharedPtr _pResponseStream; + static const double _defaultKeepAliveReliabilityLevel; static ProxyConfig _globalProxyConfig; HTTPClientSession(const HTTPClientSession &); @@ -455,6 +463,11 @@ namespace Net _lastRequest = time; } + inline double HTTPClientSession::getKeepAliveReliability() const + { + return _defaultKeepAliveReliabilityLevel; + } + } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 0bef50803a8..994807ffbff 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -120,6 +120,9 @@ namespace Net /// The value is set to "Keep-Alive" if keepAlive is /// true, or to "Close" otherwise. + void setKeepAliveTimeout(int timeout); + int getKeepAliveTimeout() const; + bool getKeepAlive() const; /// Returns true if /// * the message has a Connection header field and its value is "Keep-Alive" diff --git a/base/poco/Net/include/Poco/Net/HTTPServerParams.h b/base/poco/Net/include/Poco/Net/HTTPServerParams.h index 3c836a630a0..d614c62d57a 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerParams.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerParams.h @@ -44,7 +44,7 @@ namespace Net /// - timeout: 60 seconds /// - keepAlive: true /// - maxKeepAliveRequests: 0 - /// - keepAliveTimeout: 10 seconds + /// - keepAliveTimeout: 15 seconds void setServerName(const std::string & serverName); /// Sets the name and port (name:port) that the server uses to identify itself. diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h index ec928af304f..3df7995509a 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h @@ -56,6 +56,8 @@ namespace Net SocketAddress serverAddress(); /// Returns the server's address. + void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout); + private: bool _firstRequest; Poco::Timespan _keepAliveTimeout; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 33a3dcc4901..59800232ba9 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -37,6 +37,7 @@ namespace Net { HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig; +const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9; HTTPClientSession::HTTPClientSession(): @@ -220,7 +221,11 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config) void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) { - _keepAliveTimeout = timeout; + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection"); + } + _keepAliveTimeout = timeout; } @@ -243,6 +248,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) reconnect(); if (!keepAlive) request.setKeepAlive(false); + if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) + request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); if (!request.has(HTTPRequest::HOST) && !_host.empty()) request.setHost(_host, _port); if (!_proxyConfig.host.empty() && !bypassProxy()) @@ -324,6 +331,14 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) _mustReconnect = getKeepAlive() && !response.getKeepAlive(); + if (!_mustReconnect) + { + /// when server sends its keep alive timeout, client has to follow that value + auto timeout = response.getKeepAliveTimeout(); + if (timeout > 0) + _keepAliveTimeout = Poco::Timespan(timeout, 0); + } + if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) _pResponseStream = new HTTPFixedLengthInputStream(*this, 0); else if (response.getChunkedTransferEncoding()) @@ -430,15 +445,17 @@ std::string HTTPClientSession::proxyRequestPrefix() const return result; } +bool HTTPClientSession::isKeepAliveExpired(double reliability) const +{ + Poco::Timestamp now; + return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest; +} bool HTTPClientSession::mustReconnect() const { if (!_mustReconnect) - { - Poco::Timestamp now; - return _keepAliveTimeout <= now - _lastRequest; - } - else return true; + return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel); + return true; } @@ -511,14 +528,16 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) if (buffered()) throw Poco::LogicException("assign to a session with not empty buffered data"); - attachSocket(session.detachSocket()); setLastRequest(session.getLastRequest()); setResolvedHost(session.getResolvedHost()); - setKeepAlive(session.getKeepAlive()); + setProxyConfig(session.getProxyConfig()); setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); - setKeepAliveTimeout(session.getKeepAliveTimeout()); - setProxyConfig(session.getProxyConfig()); + setKeepAlive(session.getKeepAlive()); + if (!connected()) + setKeepAliveTimeout(session.getKeepAliveTimeout()); + + attachSocket(session.detachSocket()); session.reset(); } diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index 0cd234ee9cb..2f974b8bf0b 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,6 +17,7 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include using Poco::NumberFormatter; @@ -179,4 +180,44 @@ bool HTTPMessage::getKeepAlive() const } +void HTTPMessage::setKeepAliveTimeout(int timeout) +{ + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}", timeout)); +} + + +int parseTimeoutFromHeaderValue(const std::string_view header_value) +{ + static const std::string_view timeout_param = "timeout="; + + auto timeout_pos = header_value.find(timeout_param); + if (timeout_pos == std::string::npos) + timeout_pos = header_value.size(); + if (timeout_pos != header_value.size()) + timeout_pos += timeout_param.size(); + + auto timeout_end = header_value.find(',', timeout_pos); + if (timeout_end == std::string::npos) + timeout_end = header_value.size(); + + auto timeout_value_substr = header_value.substr(timeout_pos, timeout_end - timeout_pos); + if (timeout_value_substr.empty()) + return -1; + + int value = 0; + auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value); + + if (ec == std::errc()) + return value; + + return -1; +} + + +int HTTPMessage::getKeepAliveTimeout() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + return parseTimeoutFromHeaderValue(ka_header); +} + } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPServerConnection.cpp b/base/poco/Net/src/HTTPServerConnection.cpp index c57984b0162..d5eb29d3134 100644 --- a/base/poco/Net/src/HTTPServerConnection.cpp +++ b/base/poco/Net/src/HTTPServerConnection.cpp @@ -88,7 +88,18 @@ void HTTPServerConnection::run() pHandler->handleRequest(request, response); session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()); - } + + /// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor) + if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()) + { + int value = response.getKeepAliveTimeout(); + if (value < 0) + value = request.getKeepAliveTimeout(); + if (value > 0) + session.setKeepAliveTimeout(Poco::Timespan(value, 0)); + } + + } else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED); } catch (Poco::Exception&) diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp index d4f2b24879e..f67a63a9e0e 100644 --- a/base/poco/Net/src/HTTPServerSession.cpp +++ b/base/poco/Net/src/HTTPServerSession.cpp @@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession() { } +void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout) +{ + _keepAliveTimeout = keepAliveTimeout; +} + + bool HTTPServerSession::hasMoreRequests() { diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index cd2505df7f3..21165bbc62d 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -83,17 +84,15 @@ namespace } - size_t roundUp(size_t x, size_t rounding) + constexpr size_t roundUp(size_t x, size_t rounding) { chassert(rounding > 0); - return (x + (rounding - 1)) / rounding * rounding; - } - - - Poco::Timespan divide(const Poco::Timespan span, int divisor) - { - return Poco::Timespan(Poco::Timestamp::TimeDiff(span.totalMicroseconds() / divisor)); + return (x + rounding) / rounding * rounding; } + static_assert(roundUp(10000, 100) == 10100); + static_assert(roundUp(10001, 100) == 10100); + static_assert(roundUp(10099, 100) == 10100); + static_assert(roundUp(10100, 100) == 10200); } namespace DB @@ -202,8 +201,9 @@ public: if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) { - LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}, next warning at {}", + type, total_connections_in_group, limits.warning_limit, mute_warning_until); } } @@ -213,7 +213,7 @@ public: --total_connections_in_group; - const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 10 : 1; + const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 20 : 1; if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) { LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); @@ -221,6 +221,12 @@ public: } } + void atPoolDestroy(size_t connections) + { + std::lock_guard lock(mutex); + total_connections_in_group -= connections; + } + HTTPConnectionGroupType getType() const { return type; } const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; } @@ -273,9 +279,15 @@ private: public: using Ptr = std::shared_ptr; + using Session::mustReconnect; + + void markAsExpired() + { + isExpired = true; + } + void reconnect() override { - ProfileEvents::increment(metrics.reset); Session::close(); if (auto lock = pool.lock()) @@ -352,6 +364,11 @@ private: std::istream & result = Session::receiveResponse(response); result.exceptions(std::ios::badbit); + // that line is for temporary debug, will be removed + if (response.has(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE)) + LOG_WARNING(log, "received keep alive header: {}", + response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY)); + response_stream = &result; response_stream_completed = false; @@ -392,10 +409,11 @@ private: } response_stream = nullptr; - if (auto lock = pool.lock()) - lock->atConnectionDestroy(*this); - else - ProfileEvents::increment(metrics.reset); + group->atConnectionDestroy(); + + if (!isExpired) + if (auto lock = pool.lock()) + lock->atConnectionDestroy(*this); CurrentMetrics::sub(metrics.active_count); } @@ -404,10 +422,11 @@ private: friend class EndpointConnectionPool; template - explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...), pool(std::move(pool_)), metrics(std::move(metrics_)) + explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, ConnectionGroup::Ptr group_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) + : Session(args...), pool(std::move(pool_)), group(group_), metrics(std::move(metrics_)) { CurrentMetrics::add(metrics.active_count); + group->atConnectionCreate(); } template @@ -433,10 +452,12 @@ private: return request_stream_completed && response_stream_completed; } - WeakPtr pool; + EndpointConnectionPool::WeakPtr pool; + ConnectionGroup::Ptr group; IHTTPConnectionPoolForEndpoint::Metrics metrics; + bool isExpired = false; - Poco::Logger * log = &Poco::Logger::get("PooledConnection"); + LoggerPtr log = getLogger("PooledConnection"); std::ostream * request_stream = nullptr; std::istream * response_stream = nullptr; @@ -484,7 +505,6 @@ public: IHTTPConnectionPoolForEndpoint::ConnectionPtr getConnection(const ConnectionTimeouts & timeouts) override { - Poco::Timestamp now; std::vector expired_connections; SCOPE_EXIT({ @@ -494,8 +514,9 @@ public: { std::lock_guard lock(mutex); + expired_connections.reserve(stored_connections.size()); - wipeExpiredImpl(expired_connections, now); + wipeExpiredImpl(expired_connections); if (!stored_connections.empty()) { @@ -526,7 +547,6 @@ public: size_t wipeExpired() override { - Poco::Timestamp now; std::vector expired_connections; SCOPE_EXIT({ @@ -535,19 +555,21 @@ public: }); std::lock_guard lock(mutex); - return wipeExpiredImpl(expired_connections, now); + return wipeExpiredImpl(expired_connections); } - size_t wipeExpiredImpl(std::vector & expired_connections, Poco::Timestamp now) TSA_REQUIRES(mutex) + size_t wipeExpiredImpl(std::vector & expired_connections) TSA_REQUIRES(mutex) { + auto isSoftLimitReached = group->isSoftLimitReached(); while (!stored_connections.empty()) { auto connection = stored_connections.top(); - if (!isExpired(now, connection)) + if (!isExpired(connection, isSoftLimitReached)) return stored_connections.size(); stored_connections.pop(); + connection->markAsExpired(); expired_connections.push_back(connection); } @@ -569,16 +591,16 @@ private: WeakPtr getWeakFromThis() { return EndpointConnectionPool::weak_from_this(); } - bool isExpired(Poco::Timestamp & now, ConnectionPtr connection) + bool isExpired(ConnectionPtr connection, bool isSoftLimitReached) TSA_REQUIRES(mutex) { - if (group->isSoftLimitReached()) - return now > (connection->getLastRequest() + divide(connection->getKeepAliveTimeout(), 10)); - return now > connection->getLastRequest() + connection->getKeepAliveTimeout(); + if (isSoftLimitReached) + return connection->isKeepAliveExpired(0.1); + return connection->isKeepAliveExpired(0.8); } ConnectionPtr allocateNewConnection() { - ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), getMetrics(), host, port); + ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); connection->setKeepAlive(true); if (!proxy_configuration.isEmpty()) @@ -586,8 +608,6 @@ private: connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); } - group->atConnectionCreate(); - return connection; } @@ -619,8 +639,6 @@ private: void atConnectionDestroy(PooledConnection & connection) { - group->atConnectionDestroy(); - if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { @@ -631,14 +649,14 @@ private: auto connection_to_store = allocateNewConnection(); connection_to_store->assign(connection); - CurrentMetrics::add(getMetrics().stored_count, 1); - ProfileEvents::increment(getMetrics().preserved, 1); - { MemoryTrackerSwitcher switcher{&total_memory_tracker}; std::lock_guard lock(mutex); stored_connections.push(connection_to_store); } + + CurrentMetrics::add(getMetrics().stored_count, 1); + ProfileEvents::increment(getMetrics().preserved, 1); } @@ -726,7 +744,7 @@ createConnectionPool(ConnectionGroup::Ptr group, std::string host, UInt16 port, class HTTPConnectionPools::Impl { private: - const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 5 * 60; + const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 10 * 60; const Poco::Timespan wipe_timeout = Poco::Timespan(DEFAULT_WIPE_TIMEOUT_SECONDS, 0); ConnectionGroup::Ptr disk_group = std::make_shared(HTTPConnectionGroupType::DISK); diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index dcc3c11fd52..36bf8bc7dae 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -17,6 +16,39 @@ namespace { +template +class SafeHandler +{ +public: + using Ptr = std::shared_ptr>; + + SafeHandler() = default; + SafeHandler(SafeHandler&) = delete; + SafeHandler& operator=(SafeHandler&) = delete; + + T get() + { + std::lock_guard lock(mutex); + return obj; + } + + void set(T && options_) + { + std::lock_guard lock(mutex); + obj = std::move(options_); + } + +protected: + std::mutex mutex; + T obj = {}; +}; + +struct RequestOptions +{ + size_t slowdown_receive = 0; + int overwrite_keep_alive_timeout = 0; +}; + size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) { const size_t buffer_size = 4096; @@ -47,13 +79,19 @@ size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = class MockRequestHandler : public Poco::Net::HTTPRequestHandler { public: - explicit MockRequestHandler(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit MockRequestHandler(SafeHandler::Ptr options_) + : options(options_) { } void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override { + int value = request.getKeepAliveTimeout(); + ASSERT_GT(value, 0); + + if (options->get().overwrite_keep_alive_timeout > 0) + response.setKeepAliveTimeout(options->get().overwrite_keep_alive_timeout); + response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); auto size = request.getContentLength(); if (size > 0) @@ -61,28 +99,29 @@ public: else response.setChunkedTransferEncoding(true); // or chunk encoding - sleepForSeconds(*slowdown); + if (options->get().slowdown_receive > 0) + sleepForSeconds(options->get().slowdown_receive); stream_copy_n(request.stream(), response.send(), size); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory { public: - explicit HTTPRequestHandlerFactory(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit HTTPRequestHandlerFactory(SafeHandler::Ptr options_) + : options(options_) { } Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override { - return new MockRequestHandler(slowdown); + return new MockRequestHandler(options); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; } @@ -94,6 +133,8 @@ class ConnectionPoolTest : public testing::Test { protected: ConnectionPoolTest() { + options = std::make_shared>(); + startServer(); } @@ -102,7 +143,7 @@ protected: DB::HTTPConnectionPools::Limits def_limits{}; DB::HTTPConnectionPools::instance().setLimits(def_limits, def_limits, def_limits); - setSlowDown(0); + options->set(RequestOptions()); DB::HTTPConnectionPools::instance().dropCache(); DB::CurrentThread::getProfileEvents().reset(); @@ -129,7 +170,7 @@ protected: void startServer() { server_data.reset(); - server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive); + server_data.handler_factory = new HTTPRequestHandlerFactory(options); server_data.server = std::make_unique( server_data.handler_factory, server_data.port); @@ -143,11 +184,20 @@ protected: void setSlowDown(size_t seconds) { - *slowdown_receive = seconds; + auto opt = options->get(); + opt.slowdown_receive = seconds; + options->set(std::move(opt)); + } + + void setOverWriteTimeout(size_t seconds) + { + auto opt = options->get(); + opt.overwrite_keep_alive_timeout = int(seconds); + options->set(std::move(opt)); } DB::ConnectionTimeouts timeouts; - std::shared_ptr> slowdown_receive = std::make_shared>(0); + SafeHandler::Ptr options; struct ServerData { @@ -182,7 +232,7 @@ protected: void wait_until(std::function pred) { while (!pred()) - sleepForMilliseconds(250); + sleepForMilliseconds(10); } void echoRequest(String data, HTTPSession & session) @@ -245,45 +295,52 @@ TEST_F(ConnectionPoolTest, CanRequest) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + auto metrics = pool->getMetrics(); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanPreserve) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); - - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); } TEST_F(ConnectionPoolTest, CanReuse) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); - { auto connection = pool->getConnection(timeouts); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); @@ -293,6 +350,11 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(1, getServer().totalConnections()); ASSERT_EQ(1, getServer().currentConnections()); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + connection->reset(); } @@ -303,15 +365,16 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); } TEST_F(ConnectionPoolTest, CanReuse10) { auto pool = getPool(); - + auto metrics = pool->getMetrics(); for (int i = 0; i < 10; ++i) { @@ -328,16 +391,23 @@ TEST_F(ConnectionPoolTest, CanReuse10) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReuse5) { - timeouts.withHTTPKeepAliveTimeout(1); + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector connections; connections.reserve(5); @@ -347,11 +417,14 @@ TEST_F(ConnectionPoolTest, CanReuse5) } connections.clear(); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 5; }); ASSERT_EQ(5, getServer().currentConnections()); @@ -363,35 +436,56 @@ TEST_F(ConnectionPoolTest, CanReuse5) echoRequest("Hello", *connection); } - ASSERT_EQ(5, getServer().totalConnections()); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); + + /// wait until all connections are timeouted + wait_until([&] () { return getServer().currentConnections() == 0; }); + + { + // just to trigger pool->wipeExpired(); + auto connection = pool->getConnection(timeouts); + connection->reset(); + } + + ASSERT_EQ(6, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndCreate) { auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(connection); } - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); auto connection = std::move(in_use.back()); in_use.pop_back(); @@ -402,28 +496,39 @@ TEST_F(ConnectionPoolTest, CanReconnectAndCreate) echoRequest("Hello", *connection); - connection->reset(); + ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - wait_until([&] () { return getServer().currentConnections() == 1; }); - ASSERT_EQ(1, getServer().currentConnections()); - ASSERT_EQ(count+1, getServer().totalConnections()); - - ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndReuse) { + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; + for (int i = 0; i < count; ++i) + { + auto connection = pool->getConnection(timeouts); + /// make some request in order to show to the server the keep alive headers + echoRequest("Hello", *connection); + in_use.push_back(std::move(connection)); + } + in_use.clear(); + for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(std::move(connection)); } @@ -441,11 +546,16 @@ TEST_F(ConnectionPoolTest, CanReconnectAndReuse) wait_until([&] () { return getServer().currentConnections() == 0; }); ASSERT_EQ(0, getServer().currentConnections()); - ASSERT_EQ(2, getServer().totalConnections()); + ASSERT_EQ(count, getServer().totalConnections()); - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(count + count - 1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(count + 1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(count-1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(count-2, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReceiveTimeout) @@ -454,6 +564,7 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) timeouts.withReceiveTimeout(1); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -462,10 +573,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); { timeouts.withReceiveTimeout(3); @@ -475,10 +590,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); { /// timeouts have effect for reused session @@ -489,10 +608,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) @@ -500,6 +623,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) std::string_view message = "Hello ReadWriteBufferFromHTTP"; auto uri = Poco::URI(getServerUrl()); auto metrics = DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{})->getMetrics(); + Poco::Net::HTTPBasicCredentials empty_creds; auto buf_from_http = DB::BuilderRWBufferFromHTTP(uri) .withConnectionGroup(DB::HTTPConnectionGroupType::HTTP) @@ -527,6 +651,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); @@ -538,23 +663,26 @@ TEST_F(ConnectionPoolTest, HardLimit) DB::HTTPConnectionPools::instance().setLimits(zero_limits, zero_limits, zero_limits); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, NoReceiveCall) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -570,11 +698,124 @@ TEST_F(ConnectionPoolTest, NoReceiveCall) connection->flushRequest(); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsHoldTooLong) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + + echoRequest("Hello", *connection); + + auto fake_ka = Poco::Timespan(30 * 1000 * 1000); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(fake_ka); + DB::setTimeouts(*connection, timeouts); // new keep alive timeout has no effect + + wait_until([&] () { return getServer().currentConnections() == 0; }); + + ASSERT_EQ(1, connection->connected()); + ASSERT_EQ(1, connection->getKeepAlive()); + ASSERT_EQ(1000, connection->getKeepAliveTimeout().totalMilliseconds()); + ASSERT_EQ(1, connection->isKeepAliveExpired(connection->getKeepAliveReliability())); + + echoRequest("Hello", *connection); + } + + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsNearlyExpired) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + + sleepForMilliseconds(900); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + } + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + setOverWriteTimeout(1); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + // server do not overwrite it in the following requests but client has to remember last agreed value + setOverWriteTimeout(0); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6608a35a5a2..8d6d8ebc1a2 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -128,9 +128,9 @@ namespace DB M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ - M(UInt64, disk_connections_soft_limit, 1000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ - M(UInt64, disk_connections_store_limit, 12000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index c4b636103fe..8813c958185 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -144,7 +144,11 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) { session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); - session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + /// we can not change keep alive timeout for already initiated connections + if (!session.connected()) + { + session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + } } ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session) From 922a14eaf1fd22d1a364ec285851c50cbb2ad54f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 15:33:35 +0200 Subject: [PATCH 15/53] fix stored_count metric --- src/Common/HTTPConnectionPool.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 21165bbc62d..ab0ea1571d3 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -560,6 +560,11 @@ public: size_t wipeExpiredImpl(std::vector & expired_connections) TSA_REQUIRES(mutex) { + SCOPE_EXIT({ + CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); + ProfileEvents::increment(getMetrics().expired, expired_connections.size()); + }); + auto isSoftLimitReached = group->isSoftLimitReached(); while (!stored_connections.empty()) { @@ -573,9 +578,6 @@ public: expired_connections.push_back(connection); } - CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); - ProfileEvents::increment(getMetrics().expired, expired_connections.size()); - return stored_connections.size(); } From 532d80e20b60987947ac11eb8c4991916742157f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 16:02:07 +0200 Subject: [PATCH 16/53] fix log level in debug code --- src/Common/HTTPConnectionPool.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index ab0ea1571d3..2d3a87dda6b 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -361,13 +361,16 @@ private: std::istream & receiveResponse(Poco::Net::HTTPResponse & response) override { + int originKA = Session::getKeepAliveTimeout().totalSeconds(); + std::istream & result = Session::receiveResponse(response); result.exceptions(std::ios::badbit); // that line is for temporary debug, will be removed if (response.has(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE)) - LOG_WARNING(log, "received keep alive header: {}", - response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY)); + LOG_INFO(log, "received keep alive header: {}, original was {}", + response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), + originKA); response_stream = &result; response_stream_completed = false; From 33aee0f599867da294cfc5327cc4ab932e761066 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 4 Apr 2024 13:00:51 +0200 Subject: [PATCH 17/53] Analyzer: Fix name resolution from parent scopes --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 +++++- ...alyzer_resolve_from_parent_scope.reference | 1 + ...033_analyzer_resolve_from_parent_scope.sql | 27 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference create mode 100644 tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662..91832f6060d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3993,9 +3993,15 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const } else if (resolved_identifier->as()) { - lookup_result.resolved_identifier = resolved_identifier; return lookup_result; } + else if (auto * resolved_function = resolved_identifier->as()) + { + /// Special case: scalar subquery was executed and replaced by __getScalar function. + /// Handle it as a constant. + if (resolved_function->getFunctionName() == "__getScalar") + return lookup_result; + } throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Resolve identifier '{}' from parent scope only supported for constants and CTE. Actual {} node type {}. In scope {}", diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference new file mode 100644 index 00000000000..f599e28b8ab --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql new file mode 100644 index 00000000000..22f103c9bd5 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql @@ -0,0 +1,27 @@ +CREATE TABLE vecs_Float32 (v Array(Float32)) ENGINE=Memory; +INSERT INTO vecs_Float32 +SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) + ] AS v + FROM system.numbers + LIMIT 10 +); + +WITH (SELECT v FROM vecs_Float32 limit 1) AS a SELECT count(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_Float32); From b1bd34f66e82173bfc48c7e1a612a967562fcbc6 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 4 Apr 2024 20:25:49 +0000 Subject: [PATCH 18/53] fix --- src/Processors/QueryPlan/PartsSplitter.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 2af1bcb0260..ec51875587e 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -125,14 +126,18 @@ int compareValues(const Values & lhs, const Values & rhs) class IndexAccess { public: - explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { } + explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) + { + for (const auto & part : parts) + loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size()); + } Values getValue(size_t part_idx, size_t mark) const { const auto & index = parts[part_idx].data_part->getIndex(); - size_t size = index.size(); - Values values(size); - for (size_t i = 0; i < size; ++i) + chassert(index.size() >= loaded_columns); + Values values(loaded_columns); + for (size_t i = 0; i < loaded_columns; ++i) { index[i]->get(mark, values[i]); if (values[i].isNull()) @@ -199,6 +204,7 @@ public: } private: const RangesInDataParts & parts; + size_t loaded_columns = std::numeric_limits::max(); }; class RangesInDataPartsBuilder From 6be747bf32a7f1fcd9fee8f86c72dd2b03e48c02 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 4 Apr 2024 22:28:29 +0000 Subject: [PATCH 19/53] add test --- .../__init__.py | 0 .../test.py | 47 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py new file mode 100644 index 00000000000..e710b9942dc --- /dev/null +++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py @@ -0,0 +1,47 @@ +import pytest +import logging + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_simple_query_after_restart(start_cluster): + node.query( + """ + create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + + insert into t select 42, number from numbers_mt(100); + insert into t select number, number from numbers_mt(100); + """ + ) + + node.restart_clickhouse() + + assert ( + int( + node.query( + "select count() from t where not ignore(*)", + settings={ + "max_threads": 4, + "merge_tree_min_bytes_for_concurrent_read": 1, + "merge_tree_min_rows_for_concurrent_read": 1, + "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1, + }, + ) + ) + == 200 + ) From dd852da33925af8ba52a89034da774d512add241 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 15:57:42 +0200 Subject: [PATCH 20/53] add more debug logs --- .../Net/include/Poco/Net/HTTPClientSession.h | 5 -- base/poco/Net/src/HTTPClientSession.cpp | 26 +++++-- base/poco/Net/src/HTTPMessage.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 68 ++++++++++++++++--- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 + src/IO/S3/Client.h | 8 +-- src/IO/S3/PocoHTTPClient.cpp | 3 +- src/IO/S3/PocoHTTPClient.h | 1 + src/IO/S3/tests/gtest_aws_s3_client.cpp | 2 +- 9 files changed, 91 insertions(+), 26 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index b418937c4d5..cbf4619834b 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -458,11 +458,6 @@ namespace Net return _lastRequest; } - inline void HTTPClientSession::setLastRequest(Poco::Timestamp time) - { - _lastRequest = time; - } - inline double HTTPClientSession::getKeepAliveReliability() const { return _defaultKeepAliveReliabilityLevel; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 59800232ba9..afa1eff68a2 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -223,12 +223,24 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) { if (connected()) { - throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection"); + throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, " + "That value is managed privately after connection is established."); } _keepAliveTimeout = timeout; } +void HTTPClientSession::setLastRequest(Poco::Timestamp time) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change last request on initiated connection, " + "That value is managed privately after connection is established."); + } + _lastRequest = time; +} + + std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { _pRequestStream = 0; @@ -246,8 +258,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { if (!connected()) reconnect(); - if (!keepAlive) - request.setKeepAlive(false); + if (!request.has(HTTPMessage::CONNECTION)) + request.setKeepAlive(keepAlive); if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); if (!request.has(HTTPRequest::HOST) && !_host.empty()) @@ -528,14 +540,16 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) if (buffered()) throw Poco::LogicException("assign to a session with not empty buffered data"); - setLastRequest(session.getLastRequest()); + poco_assert(!connected()); + setResolvedHost(session.getResolvedHost()); setProxyConfig(session.getProxyConfig()); setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); setKeepAlive(session.getKeepAlive()); - if (!connected()) - setKeepAliveTimeout(session.getKeepAliveTimeout()); + + setLastRequest(session.getLastRequest()); + setKeepAliveTimeout(session.getKeepAliveTimeout()); attachSocket(session.detachSocket()); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index 2f974b8bf0b..af743dfa2eb 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -182,7 +182,7 @@ bool HTTPMessage::getKeepAlive() const void HTTPMessage::setKeepAliveTimeout(int timeout) { - add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}", timeout)); + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max=1000", timeout)); } diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 2d3a87dda6b..f64d6658a55 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -193,6 +193,18 @@ public: return total_connections_in_group >= limits.store_limit; } + size_t getStored() const + { + std::lock_guard lock(mutex); + return total_connections_in_group; + } + + size_t getStoreLimit() const + { + std::lock_guard lock(mutex); + return limits.store_limit; + } + void atConnectionCreate() { std::lock_guard lock(mutex); @@ -221,12 +233,6 @@ public: } } - void atPoolDestroy(size_t connections) - { - std::lock_guard lock(mutex); - total_connections_in_group -= connections; - } - HTTPConnectionGroupType getType() const { return type; } const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; } @@ -345,11 +351,29 @@ private: Session::flushRequest(); } + String printAllHeaders(Poco::Net::HTTPMessage & message) const + { + String out; + out.reserve(300); + for (auto & [k, v] : message) + { + out.append(fmt::format("<{}: {}> ", k, v)); + } + return out; + } + std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override { std::ostream & result = Session::sendRequest(request); result.exceptions(std::ios::badbit); + // that line is for temporary debug, will be removed + LOG_INFO(log, "Send request to {} with: usage count {}, keep-alive timeout={}, headers: {}", + getTarget(), + usage_cnt, + Session::getKeepAliveTimeout().totalSeconds(), + printAllHeaders(request)); + request_stream = &result; request_stream_completed = false; @@ -368,9 +392,12 @@ private: // that line is for temporary debug, will be removed if (response.has(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE)) - LOG_INFO(log, "received keep alive header: {}, original was {}", + LOG_INFO(log, "Received response from {} with: usage count {}, keep alive header: {}, original ka {}, headers: {}", + getTarget(), + usage_cnt, response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), - originKA); + originKA, + printAllHeaders(response)); response_stream = &result; response_stream_completed = false; @@ -415,8 +442,19 @@ private: group->atConnectionDestroy(); if (!isExpired) + { if (auto lock = pool.lock()) lock->atConnectionDestroy(*this); + } + else + { + Poco::Timestamp now; + LOG_INFO(log, "Expired connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}", + getTarget(), + usage_cnt, + Session::getKeepAliveTimeout().totalSeconds(), + Poco::Timespan(now - Session::getLastRequest()).totalSeconds()); + } CurrentMetrics::sub(metrics.active_count); } @@ -459,6 +497,7 @@ private: ConnectionGroup::Ptr group; IHTTPConnectionPoolForEndpoint::Metrics metrics; bool isExpired = false; + size_t usage_cnt = 1; LoggerPtr log = getLogger("PooledConnection"); @@ -527,6 +566,8 @@ public: stored_connections.pop(); setTimeouts(*it, timeouts); + it->usage_cnt += 1; + ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -647,12 +688,23 @@ private: if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { + Poco::Timestamp now; + LOG_INFO(getLogger("PooledConnection"), + "Reset connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}", + getTarget(), + connection.usage_cnt, + connection.getKeepAliveTimeout().totalSeconds(), + Poco::Timespan(now - connection.getLastRequest()).totalSeconds(), + connection.isCompleted(), + group->isStoreLimitReached(), group->getStored(), group->getStoreLimit()); + ProfileEvents::increment(getMetrics().reset, 1); return; } auto connection_to_store = allocateNewConnection(); connection_to_store->assign(connection); + connection_to_store->usage_cnt = connection.usage_cnt; { MemoryTrackerSwitcher switcher{&total_memory_tracker}; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index df1ccbb32d9..7ce94699053 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -76,6 +76,8 @@ std::unique_ptr getClient( client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); + client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + client_configuration.endpointOverride = uri.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index c7bc727bf32..c79ec05c8c6 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -96,9 +96,9 @@ bool isS3ExpressEndpoint(const std::string & endpoint); struct ClientSettings { - bool use_virtual_addressing; + bool use_virtual_addressing = false; /// Disable checksum to avoid extra read of the input stream - bool disable_checksum; + bool disable_checksum = false; /// Should client send ComposeObject request after upload to GCS. /// /// Previously ComposeObject request was required to make Copy possible, @@ -108,8 +108,8 @@ struct ClientSettings /// /// Ability to enable it preserved since likely it is required for old /// files. - bool gcs_issue_compose_request; - bool is_s3express_bucket; + bool gcs_issue_compose_request = false; + bool is_s3express_bucket = false; }; /// Client that improves the client from the AWS SDK diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a29a4b0b8ee..150b8146147 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -146,7 +146,8 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( - client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)); + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) + .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index ebbddbb2c7e..f568eb5ddb8 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -51,6 +51,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration /// See PoolBase::BehaviourOnLimit bool s3_use_adaptive_timeouts = true; + size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; std::function error_report; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 25786619241..0a28c578f69 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -159,7 +159,7 @@ void testServerSideEncryption( DB::S3::CredentialsConfiguration { .use_environment_credentials = use_environment_credentials, - .use_insecure_imds_request = use_insecure_imds_request + .use_insecure_imds_request = use_insecure_imds_request, } ); From 5cab8d185fb5ad1f8607a4ad7140a15469754e99 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 19:29:42 +0200 Subject: [PATCH 21/53] more details --- base/poco/Net/src/HTTPClientSession.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 42 ++++++++++++++++--------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index afa1eff68a2..bc70559c5eb 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -348,7 +348,7 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) /// when server sends its keep alive timeout, client has to follow that value auto timeout = response.getKeepAliveTimeout(); if (timeout > 0) - _keepAliveTimeout = Poco::Timespan(timeout, 0); + _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0)); } if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index f64d6658a55..eb6ce00e611 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -322,6 +322,11 @@ private: Session::getPort()); } + Poco::Timespan idleTime() { + Poco::Timestamp now; + return now - Session::getLastRequest(); + } + void flushRequest() override { if (bool(request_stream)) @@ -364,14 +369,18 @@ private: std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override { + auto idle = idleTime(); std::ostream & result = Session::sendRequest(request); result.exceptions(std::ios::badbit); // that line is for temporary debug, will be removed - LOG_INFO(log, "Send request to {} with: usage count {}, keep-alive timeout={}, headers: {}", + LOG_INFO(log, "Send request to {} with: version {}, method {}, usage count {}, keep-alive timeout={}, last usage ago: {}ms, headers: {}", + request.getVersion(), + request.getMethod(), getTarget(), usage_cnt, Session::getKeepAliveTimeout().totalSeconds(), + idle.totalMilliseconds(), printAllHeaders(request)); request_stream = &result; @@ -391,13 +400,15 @@ private: result.exceptions(std::ios::badbit); // that line is for temporary debug, will be removed - if (response.has(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE)) - LOG_INFO(log, "Received response from {} with: usage count {}, keep alive header: {}, original ka {}, headers: {}", - getTarget(), - usage_cnt, - response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), - originKA, - printAllHeaders(response)); + LOG_INFO(log, "Received response from {} with: version {}, code {}, usage count {}, keep alive header: {}, original ka {}, last usage ago: {}ms, headers: {}", + getTarget(), + response.getVersion(), + int(response.getStatus()), + usage_cnt, + response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), + originKA, + idleTime().totalMilliseconds(), + printAllHeaders(response)); response_stream = &result; response_stream_completed = false; @@ -449,11 +460,11 @@ private: else { Poco::Timestamp now; - LOG_INFO(log, "Expired connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}", + LOG_INFO(log, "Expired connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}s", getTarget(), usage_cnt, Session::getKeepAliveTimeout().totalSeconds(), - Poco::Timespan(now - Session::getLastRequest()).totalSeconds()); + idleTime().totalSeconds()); } CurrentMetrics::sub(metrics.active_count); @@ -498,6 +509,7 @@ private: IHTTPConnectionPoolForEndpoint::Metrics metrics; bool isExpired = false; size_t usage_cnt = 1; + size_t exception_level = std::uncaught_exceptions(); LoggerPtr log = getLogger("PooledConnection"); @@ -568,7 +580,6 @@ public: setTimeouts(*it, timeouts); it->usage_cnt += 1; - ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -690,13 +701,16 @@ private: { Poco::Timestamp now; LOG_INFO(getLogger("PooledConnection"), - "Reset connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}", + "Reset connection to {} with: usage count {}, keep alive timeout: {}, connected {}, must recon {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}, there is exception {}", getTarget(), connection.usage_cnt, connection.getKeepAliveTimeout().totalSeconds(), - Poco::Timespan(now - connection.getLastRequest()).totalSeconds(), + connection.connected(), + connection.mustReconnect(), + connection.idleTime().totalSeconds(), connection.isCompleted(), - group->isStoreLimitReached(), group->getStored(), group->getStoreLimit()); + group->isStoreLimitReached(), group->getStored(), group->getStoreLimit(), + connection.exception_level - std::uncaught_exceptions()); ProfileEvents::increment(getMetrics().reset, 1); return; From ae3a1999398b4f16880e2d892cb11bb414944b81 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 22:49:52 +0200 Subject: [PATCH 22/53] support max requests for keep alive --- .../Net/include/Poco/Net/HTTPClientSession.h | 20 ++++ base/poco/Net/include/Poco/Net/HTTPMessage.h | 3 +- base/poco/Net/src/HTTPClientSession.cpp | 33 +++++- base/poco/Net/src/HTTPMessage.cpp | 37 ++++--- src/Common/HTTPConnectionPool.cpp | 69 +++++++----- src/Common/tests/gtest_connection_pool.cpp | 103 ++++++++++++++++-- src/Core/Defines.h | 1 + src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 +- src/IO/ConnectionTimeouts.cpp | 1 + src/IO/ConnectionTimeouts.h | 2 + src/IO/S3/Credentials.h | 2 + src/IO/S3/PocoHTTPClient.h | 1 + 12 files changed, 219 insertions(+), 56 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index cbf4619834b..edbb135d8c6 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -213,6 +213,12 @@ namespace Net Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. + void setKeepAliveMaxRequests(int max_requests); + + int getKeepAliveMaxRequests() const; + + int getKeepAliveRequest() const; + bool isKeepAliveExpired(double reliability = 1.0) const; /// Returns if the connection is expired with some margin as fraction of timeout as reliability @@ -352,6 +358,8 @@ namespace Net void assign(HTTPClientSession & session); + void setKeepAliveRequest(int request); + HTTPSessionFactory _proxySessionFactory; /// Factory to create HTTPClientSession to proxy. private: @@ -360,6 +368,8 @@ namespace Net Poco::UInt16 _port; ProxyConfig _proxyConfig; Poco::Timespan _keepAliveTimeout; + int _keepAliveCurrentRequest = 0; + int _keepAliveMaxRequests = 1000; Poco::Timestamp _lastRequest; bool _reconnect; bool _mustReconnect; @@ -463,6 +473,16 @@ namespace Net return _defaultKeepAliveReliabilityLevel; } + inline int HTTPClientSession::getKeepAliveMaxRequests() const + { + return _keepAliveMaxRequests; + } + + inline int HTTPClientSession::getKeepAliveRequest() const + { + return _keepAliveCurrentRequest; + } + } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 994807ffbff..8bc95ccc1af 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -120,8 +120,9 @@ namespace Net /// The value is set to "Keep-Alive" if keepAlive is /// true, or to "Close" otherwise. - void setKeepAliveTimeout(int timeout); + void setKeepAliveTimeout(int timeout, int max_requests); int getKeepAliveTimeout() const; + int getKeepAliveMaxRequests() const; bool getKeepAlive() const; /// Returns true if diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index bc70559c5eb..e489ab56b98 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -230,7 +230,25 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) } -void HTTPClientSession::setLastRequest(Poco::Timestamp time) +void HTTPClientSession::setKeepAliveMaxRequests(int max_requests) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, " + "That value is managed privately after connection is established."); + } + _keepAliveMaxRequests = max_requests; +} + + +void HTTPClientSession::setKeepAliveRequest(int request) +{ + _keepAliveCurrentRequest = request; +} + + + + void HTTPClientSession::setLastRequest(Poco::Timestamp time) { if (connected()) { @@ -248,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) clearException(); _responseReceived = false; + _keepAliveCurrentRequest += 1; + bool keepAlive = getKeepAlive(); if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty()) { @@ -261,7 +281,7 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) if (!request.has(HTTPMessage::CONNECTION)) request.setKeepAlive(keepAlive); if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) - request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); + request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests); if (!request.has(HTTPRequest::HOST) && !_host.empty()) request.setHost(_host, _port); if (!_proxyConfig.host.empty() && !bypassProxy()) @@ -349,6 +369,9 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) auto timeout = response.getKeepAliveTimeout(); if (timeout > 0) _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0)); + auto max_requests = response.getKeepAliveMaxRequests(); + if (max_requests > 0) + _keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests); } if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) @@ -460,7 +483,8 @@ std::string HTTPClientSession::proxyRequestPrefix() const bool HTTPClientSession::isKeepAliveExpired(double reliability) const { Poco::Timestamp now; - return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest; + return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest + || _keepAliveCurrentRequest > _keepAliveMaxRequests; } bool HTTPClientSession::mustReconnect() const @@ -551,6 +575,9 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) setLastRequest(session.getLastRequest()); setKeepAliveTimeout(session.getKeepAliveTimeout()); + _keepAliveMaxRequests = session._keepAliveMaxRequests; + _keepAliveCurrentRequest = session._keepAliveCurrentRequest; + attachSocket(session.detachSocket()); session.reset(); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index af743dfa2eb..c0083ec410c 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -180,27 +180,25 @@ bool HTTPMessage::getKeepAlive() const } -void HTTPMessage::setKeepAliveTimeout(int timeout) +void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests) { - add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max=1000", timeout)); + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests)); } -int parseTimeoutFromHeaderValue(const std::string_view header_value) +int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name) { - static const std::string_view timeout_param = "timeout="; + auto param_value_pos = header_value.find(param_name); + if (param_value_pos == std::string::npos) + param_value_pos = header_value.size(); + if (param_value_pos != header_value.size()) + param_value_pos += param_name.size(); - auto timeout_pos = header_value.find(timeout_param); - if (timeout_pos == std::string::npos) - timeout_pos = header_value.size(); - if (timeout_pos != header_value.size()) - timeout_pos += timeout_param.size(); + auto param_value_end = header_value.find(',', param_value_pos); + if (param_value_end == std::string::npos) + param_value_end = header_value.size(); - auto timeout_end = header_value.find(',', timeout_pos); - if (timeout_end == std::string::npos) - timeout_end = header_value.size(); - - auto timeout_value_substr = header_value.substr(timeout_pos, timeout_end - timeout_pos); + auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos); if (timeout_value_substr.empty()) return -1; @@ -217,7 +215,16 @@ int parseTimeoutFromHeaderValue(const std::string_view header_value) int HTTPMessage::getKeepAliveTimeout() const { const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); - return parseTimeoutFromHeaderValue(ka_header); + static const std::string_view timeout_param = "timeout="; + return parseFromHeaderValues(ka_header, timeout_param); +} + + +int HTTPMessage::getKeepAliveMaxRequests() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + static const std::string_view timeout_param = "max="; + return parseFromHeaderValues(ka_header, timeout_param); } } } // namespace Poco::Net diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index eb6ce00e611..926222934e4 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -301,6 +301,8 @@ private: auto timeouts = getTimeouts(*this); auto new_connection = lock->getConnection(timeouts); Session::assign(*new_connection); + if (Session::getKeepAliveRequest() == 0) + Session::setKeepAliveRequest(1); } else { @@ -322,7 +324,8 @@ private: Session::getPort()); } - Poco::Timespan idleTime() { + Poco::Timespan idleTime() + { Poco::Timestamp now; return now - Session::getLastRequest(); } @@ -374,11 +377,11 @@ private: result.exceptions(std::ios::badbit); // that line is for temporary debug, will be removed - LOG_INFO(log, "Send request to {} with: version {}, method {}, usage count {}, keep-alive timeout={}, last usage ago: {}ms, headers: {}", + LOG_INFO(log, "Send request to {} with: version {}, method {}, request no {}, keep-alive timeout={}, last usage ago: {}ms, headers: {}", request.getVersion(), request.getMethod(), getTarget(), - usage_cnt, + Session::getKeepAliveRequest(), Session::getKeepAliveTimeout().totalSeconds(), idle.totalMilliseconds(), printAllHeaders(request)); @@ -400,11 +403,11 @@ private: result.exceptions(std::ios::badbit); // that line is for temporary debug, will be removed - LOG_INFO(log, "Received response from {} with: version {}, code {}, usage count {}, keep alive header: {}, original ka {}, last usage ago: {}ms, headers: {}", + LOG_INFO(log, "Received response from {} with: version {}, code {}, request no {}, keep alive header: {}, original ka {}, last usage ago: {}ms, headers: {}", getTarget(), response.getVersion(), int(response.getStatus()), - usage_cnt, + Session::getKeepAliveRequest(), response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), originKA, idleTime().totalMilliseconds(), @@ -460,9 +463,9 @@ private: else { Poco::Timestamp now; - LOG_INFO(log, "Expired connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}s", + LOG_INFO(log, "Expired connection to {} with: request no {}, keep alive timeout: {}, last usage ago: {}s", getTarget(), - usage_cnt, + Session::getKeepAliveRequest(), Session::getKeepAliveTimeout().totalSeconds(), idleTime().totalSeconds()); } @@ -474,8 +477,15 @@ private: friend class EndpointConnectionPool; template - explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, ConnectionGroup::Ptr group_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...), pool(std::move(pool_)), group(group_), metrics(std::move(metrics_)) + explicit PooledConnection( + EndpointConnectionPool::WeakPtr pool_, + ConnectionGroup::Ptr group_, + IHTTPConnectionPoolForEndpoint::Metrics metrics_, + Args &&... args) + : Session(args...) + , pool(std::move(pool_)) + , group(group_) + , metrics(std::move(metrics_)) { CurrentMetrics::add(metrics.active_count); group->atConnectionCreate(); @@ -508,7 +518,7 @@ private: ConnectionGroup::Ptr group; IHTTPConnectionPoolForEndpoint::Metrics metrics; bool isExpired = false; - size_t usage_cnt = 1; + size_t exception_level = std::uncaught_exceptions(); LoggerPtr log = getLogger("PooledConnection"); @@ -578,7 +588,6 @@ public: stored_connections.pop(); setTimeouts(*it, timeouts); - it->usage_cnt += 1; ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -655,47 +664,50 @@ private: return connection->isKeepAliveExpired(0.8); } - ConnectionPtr allocateNewConnection() + + ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) { - ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); + auto connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); + connection->setKeepAlive(true); + setTimeouts(*connection, timeouts); if (!proxy_configuration.isEmpty()) { connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); } - return connection; - } - - ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) - { auto address = HostResolversPool::instance().getResolver(host)->resolve(); - - auto session = allocateNewConnection(); - - setTimeouts(*session, timeouts); - session->setResolvedHost(*address); + connection->setResolvedHost(*address); try { auto timer = CurrentThread::getProfileEvents().timer(getMetrics().elapsed_microseconds); - session->doConnect(); + connection->doConnect(); } catch (...) { address.setFail(); ProfileEvents::increment(getMetrics().errors); - session->reset(); + connection->reset(); throw; } ProfileEvents::increment(getMetrics().created); - return session; + return connection; } void atConnectionDestroy(PooledConnection & connection) { + if (connection.getKeepAliveRequest() >= connection.getKeepAliveMaxRequests()) + { + LOG_INFO(getLogger("PooledConnection"), "Expired by connection number {}", + connection.getKeepAliveRequest()); + + ProfileEvents::increment(getMetrics().expired, 1); + return; + } + if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { @@ -703,7 +715,7 @@ private: LOG_INFO(getLogger("PooledConnection"), "Reset connection to {} with: usage count {}, keep alive timeout: {}, connected {}, must recon {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}, there is exception {}", getTarget(), - connection.usage_cnt, + connection.getKeepAliveRequest(), connection.getKeepAliveTimeout().totalSeconds(), connection.connected(), connection.mustReconnect(), @@ -716,9 +728,8 @@ private: return; } - auto connection_to_store = allocateNewConnection(); + auto connection_to_store = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); connection_to_store->assign(connection); - connection_to_store->usage_cnt = connection.usage_cnt; { MemoryTrackerSwitcher switcher{&total_memory_tracker}; diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index 36bf8bc7dae..cc091d12bb0 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -47,6 +47,7 @@ struct RequestOptions { size_t slowdown_receive = 0; int overwrite_keep_alive_timeout = 0; + int overwrite_keep_alive_max_requests = 10; }; size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) @@ -89,8 +90,10 @@ public: int value = request.getKeepAliveTimeout(); ASSERT_GT(value, 0); - if (options->get().overwrite_keep_alive_timeout > 0) - response.setKeepAliveTimeout(options->get().overwrite_keep_alive_timeout); + auto params = options->get(); + + if (params.overwrite_keep_alive_timeout > 0) + response.setKeepAliveTimeout(params.overwrite_keep_alive_timeout, params.overwrite_keep_alive_max_requests); response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); auto size = request.getContentLength(); @@ -99,8 +102,8 @@ public: else response.setChunkedTransferEncoding(true); // or chunk encoding - if (options->get().slowdown_receive > 0) - sleepForSeconds(options->get().slowdown_receive); + if (params.slowdown_receive > 0) + sleepForSeconds(params.slowdown_receive); stream_copy_n(request.stream(), response.send(), size); } @@ -189,10 +192,11 @@ protected: options->set(std::move(opt)); } - void setOverWriteTimeout(size_t seconds) + void setOverWriteKeepAlive(size_t seconds, int max_requests) { auto opt = options->get(); opt.overwrite_keep_alive_timeout = int(seconds); + opt.overwrite_keep_alive_max_requests= max_requests; options->set(std::move(opt)); } @@ -794,7 +798,7 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) } { - setOverWriteTimeout(1); + setOverWriteKeepAlive(1, 10); auto connection = pool->getConnection(timeouts); echoRequest("Hello", *connection); ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); @@ -803,7 +807,7 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) { // server do not overwrite it in the following requests but client has to remember last agreed value - setOverWriteTimeout(0); + setOverWriteKeepAlive(0, 0); auto connection = pool->getConnection(timeouts); echoRequest("Hello", *connection); ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); @@ -819,3 +823,88 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); } + +TEST_F(ConnectionPoolTest, MaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto max_requests = 5; + timeouts.http_keep_alive_max_requests = max_requests; + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + for (int i = 1; i <= max_requests - 1; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(i, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(max_requests, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} + + +TEST_F(ConnectionPoolTest, ServerOverwriteMaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(1000, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(1, connection->getKeepAliveRequest()); + } + + auto max_requests = 3; + setOverWriteKeepAlive(5, max_requests); + + for (int i = 2; i <= 10*max_requests; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(5, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(((i-1) % max_requests) + 1, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} diff --git a/src/Core/Defines.h b/src/Core/Defines.h index a8dd26519c2..f2142bc764d 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -54,6 +54,7 @@ static constexpr auto DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT = 15; static constexpr auto DEFAULT_TCP_KEEP_ALIVE_TIMEOUT = 290; static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT = 30; +static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST = 1000; static constexpr auto DBMS_DEFAULT_PATH = "/var/lib/clickhouse/"; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 7ce94699053..c3114eb0b6f 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -76,7 +76,8 @@ std::unique_ptr getClient( client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); - client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); + client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); client_configuration.endpointOverride = uri.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index 8813c958185..da6214ae477 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -148,6 +148,7 @@ void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeout if (!session.connected()) { session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + session.setKeepAliveMaxRequests(int(timeouts.http_keep_alive_max_requests)); } } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 49305f42d85..f497285bd0c 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -35,6 +35,8 @@ struct ConnectionTimeouts Poco::Timespan tcp_keep_alive_timeout = Poco::Timespan(DEFAULT_TCP_KEEP_ALIVE_TIMEOUT, 0); Poco::Timespan http_keep_alive_timeout = Poco::Timespan(DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, 0); + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; + /// Timeouts for HedgedConnections Poco::Timespan hedged_connection_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 34dc0c1d2bd..8d586223035 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -22,6 +22,8 @@ inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120; inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000; inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000; inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 100; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100; /// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6. static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal"; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index f568eb5ddb8..a0b35e9b4a9 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -52,6 +52,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration /// See PoolBase::BehaviourOnLimit bool s3_use_adaptive_timeouts = true; size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; std::function error_report; From dddb0d9f4a83569e9a64952b20acfc95da2cdf24 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 03:02:45 +0200 Subject: [PATCH 23/53] fix http_keep_alive_max_requests set up --- src/IO/ConnectionTimeouts.h | 7 +++++++ src/IO/S3/PocoHTTPClient.cpp | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index f497285bd0c..b86ec44d21c 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -71,6 +71,7 @@ APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(DECLARE_BUILDER_FOR_MEMBER) ConnectionTimeouts & withConnectionTimeout(size_t seconds); ConnectionTimeouts & withConnectionTimeout(Poco::Timespan span); + ConnectionTimeouts & withHTTPKeepAliveMaxRequests(size_t requests); }; /// NOLINTBEGIN(bugprone-macro-parentheses) @@ -116,6 +117,12 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time return *this; } +inline ConnectionTimeouts & ConnectionTimeouts::withHTTPKeepAliveMaxRequests(size_t requests) +{ + http_keep_alive_max_requests = requests; + return *this; +} + void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 150b8146147..de20a712d4c 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -147,7 +147,8 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) - .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)); + .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)) + .withHTTPKeepAliveMaxRequests(client_configuration.http_keep_alive_max_requests); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) From cf982cc114ef5b226815360590e2c207516de658 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 05:00:01 +0200 Subject: [PATCH 24/53] remove debug logging --- src/Common/HTTPConnectionPool.cpp | 76 ------------------------------- 1 file changed, 76 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 81c36dcd970..7f99d6a647f 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -193,18 +193,6 @@ public: return total_connections_in_group >= limits.store_limit; } - size_t getStored() const - { - std::lock_guard lock(mutex); - return total_connections_in_group; - } - - size_t getStoreLimit() const - { - std::lock_guard lock(mutex); - return limits.store_limit; - } - void atConnectionCreate() { std::lock_guard lock(mutex); @@ -359,33 +347,12 @@ private: Session::flushRequest(); } - String printAllHeaders(Poco::Net::HTTPMessage & message) const - { - String out; - out.reserve(300); - for (auto & [k, v] : message) - { - out.append(fmt::format("<{}: {}> ", k, v)); - } - return out; - } - std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override { auto idle = idleTime(); std::ostream & result = Session::sendRequest(request); result.exceptions(std::ios::badbit); - // that line is for temporary debug, will be removed - LOG_INFO(log, "Send request to {} with: version {}, method {}, request no {}, keep-alive timeout={}, last usage ago: {}ms, headers: {}", - request.getVersion(), - request.getMethod(), - getTarget(), - Session::getKeepAliveRequest(), - Session::getKeepAliveTimeout().totalSeconds(), - idle.totalMilliseconds(), - printAllHeaders(request)); - request_stream = &result; request_stream_completed = false; @@ -397,22 +364,9 @@ private: std::istream & receiveResponse(Poco::Net::HTTPResponse & response) override { - int originKA = Session::getKeepAliveTimeout().totalSeconds(); - std::istream & result = Session::receiveResponse(response); result.exceptions(std::ios::badbit); - // that line is for temporary debug, will be removed - LOG_INFO(log, "Received response from {} with: version {}, code {}, request no {}, keep alive header: {}, original ka {}, last usage ago: {}ms, headers: {}", - getTarget(), - response.getVersion(), - int(response.getStatus()), - Session::getKeepAliveRequest(), - response.get(Poco::Net::HTTPMessage::CONNECTION_KEEP_ALIVE, Poco::Net::HTTPMessage::EMPTY), - originKA, - idleTime().totalMilliseconds(), - printAllHeaders(response)); - response_stream = &result; response_stream_completed = false; @@ -456,19 +410,8 @@ private: group->atConnectionDestroy(); if (!isExpired) - { if (auto lock = pool.lock()) lock->atConnectionDestroy(*this); - } - else - { - Poco::Timestamp now; - LOG_INFO(log, "Expired connection to {} with: request no {}, keep alive timeout: {}, last usage ago: {}s", - getTarget(), - Session::getKeepAliveRequest(), - Session::getKeepAliveTimeout().totalSeconds(), - idleTime().totalSeconds()); - } CurrentMetrics::sub(metrics.active_count); } @@ -519,8 +462,6 @@ private: IHTTPConnectionPoolForEndpoint::Metrics metrics; bool isExpired = false; - size_t exception_level = std::uncaught_exceptions(); - LoggerPtr log = getLogger("PooledConnection"); std::ostream * request_stream = nullptr; @@ -701,9 +642,6 @@ private: { if (connection.getKeepAliveRequest() >= connection.getKeepAliveMaxRequests()) { - LOG_INFO(getLogger("PooledConnection"), "Expired by connection number {}", - connection.getKeepAliveRequest()); - ProfileEvents::increment(getMetrics().expired, 1); return; } @@ -711,19 +649,6 @@ private: if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { - Poco::Timestamp now; - LOG_INFO(getLogger("PooledConnection"), - "Reset connection to {} with: usage count {}, keep alive timeout: {}, connected {}, must recon {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}, there is exception {}", - getTarget(), - connection.getKeepAliveRequest(), - connection.getKeepAliveTimeout().totalSeconds(), - connection.connected(), - connection.mustReconnect(), - connection.idleTime().totalSeconds(), - connection.isCompleted(), - group->isStoreLimitReached(), group->getStored(), group->getStoreLimit(), - connection.exception_level - std::uncaught_exceptions()); - ProfileEvents::increment(getMetrics().reset, 1); return; } @@ -833,7 +758,6 @@ private: ConnectionGroup::Ptr storage_group = std::make_shared(HTTPConnectionGroupType::STORAGE); ConnectionGroup::Ptr http_group = std::make_shared(HTTPConnectionGroupType::HTTP); - /// If multiple mutexes are held simultaneously, /// they should be locked in this order: /// HTTPConnectionPools::mutex, then EndpointConnectionPool::mutex, then ConnectionGroup::mutex. From 81eda37f7f0a62cd1a4499c56a66daa7ef981827 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Apr 2024 10:27:13 +0200 Subject: [PATCH 25/53] Print correct count --- utils/postprocess-traces/postprocess-traces.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/postprocess-traces/postprocess-traces.pl b/utils/postprocess-traces/postprocess-traces.pl index 3e50f64d864..1c198908580 100755 --- a/utils/postprocess-traces/postprocess-traces.pl +++ b/utils/postprocess-traces/postprocess-traces.pl @@ -13,9 +13,9 @@ sub process_stacktrace my $group = \$grouped_stacks; for my $frame (reverse @current_stack) { + $group = \$$group->{children}{$frame}; $$group->{count} ||= 0; ++$$group->{count}; - $group = \$$group->{children}{$frame}; } @current_stack = (); @@ -47,7 +47,7 @@ sub print_group for my $key (sort { $group->{children}{$b}{count} <=> $group->{children}{$a}{count} } keys %{$group->{children}}) { - my $count = $group->{count}; + my $count = $group->{children}{$key}{count}; print(('| ' x $level) . $count . (' ' x (5 - (length $count))) . $key . "\n"); print_group($group->{children}{$key}, $level + 1); } From ce1f5144177c404c955bd006f0428ee932ad49ac Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 10:39:05 +0000 Subject: [PATCH 26/53] Fix optimize_uniq_to_count when only prefix of key is matched --- src/Analyzer/Passes/UniqToCountPass.cpp | 13 +++++++++++-- .../02990_optimize_uniq_to_count_alias.reference | 1 + .../02990_optimize_uniq_to_count_alias.sql | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d7d11e9a580..b801865c9a5 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -29,7 +29,8 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) return {}; NamesAndTypes result; - for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + const auto & group_by_elements = query_node->getGroupByNode()->getChildren(); + for (const auto & group_by_element : group_by_elements) { const auto & projection_columns = query_node->getProjectionColumns(); const auto & projection_nodes = query_node->getProjection().getNodes(); @@ -38,10 +39,18 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) for (size_t i = 0; i < projection_columns.size(); i++) { - if (projection_nodes[i]->isEqual(*group_by_ele)) + if (projection_nodes[i]->isEqual(*group_by_element)) + { result.push_back(projection_columns[i]); + break; + } } } + /// If some group by keys are not matched, we cannot apply optimization, + /// because prefix of group by keys may not be unique. + if (result.size() != group_by_elements.size()) + return {}; + return result; } diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference index 6ed281c757a..e8183f05f5d 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference @@ -1,2 +1,3 @@ 1 1 +1 diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql index 5ba0be39991..54d19264c45 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql @@ -34,4 +34,19 @@ FROM ) AS t ) SETTINGS optimize_uniq_to_count=1; +-- https://github.com/ClickHouse/ClickHouse/issues/62298 +DROP TABLE IF EXISTS users; +CREATE TABLE users +( + `id` Int64, + `name` String +) +ENGINE = ReplacingMergeTree +ORDER BY (id, name); + +INSERT INTO users VALUES (1, 'pufit'), (1, 'pufit2'), (1, 'pufit3'); + +SELECT uniqExact(id) FROM ( SELECT id FROM users WHERE id = 1 GROUP BY id, name ); + +DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS tags; From 500c3fe0fcb197f7d8b2f0a6148480727015acf1 Mon Sep 17 00:00:00 2001 From: Sean Haynes Date: Fri, 5 Apr 2024 10:38:28 +0000 Subject: [PATCH 27/53] Fix small typo in Dictionary source loader --- src/Interpreters/ExternalLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 36664cbd06f..53e91971d92 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1186,7 +1186,7 @@ private: else { auto result = std::chrono::system_clock::now() + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); - LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors.", to_string(result), error_count); + LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors)", to_string(result), error_count); return result; } } From e53ba4fa9db4646ee3a0c193594379b33043bcf2 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 13:32:07 +0000 Subject: [PATCH 28/53] Analyzer: Fix PREWHERE with lambda functions --- src/Planner/CollectTableExpressionData.cpp | 4 +++- .../0_stateless/03036_prewhere_lambda_function.reference | 2 ++ .../0_stateless/03036_prewhere_lambda_function.sql | 8 ++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03036_prewhere_lambda_function.reference create mode 100644 tests/queries/0_stateless/03036_prewhere_lambda_function.sql diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 385381f1355..27b5909c13b 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -235,7 +235,9 @@ public: static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) { auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); + return child_node_type != QueryTreeNodeType::QUERY && + child_node_type != QueryTreeNodeType::UNION && + child_node_type != QueryTreeNodeType::LAMBDA; } private: diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference new file mode 100644 index 00000000000..470e4427d96 --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference @@ -0,0 +1,2 @@ +[4,5,6] +[4,5,6] diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql new file mode 100644 index 00000000000..7a5da7ed689 --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (A Array(Int64)) Engine = MergeTree ORDER BY tuple(); +INSERT INTO t VALUES ([1,2,3]), ([4,5,6]), ([7,8,9]); + +SELECT * FROM t PREWHERE arrayExists(x -> x = 5, A); +SELECT * FROM t PREWHERE arrayExists(lamdba(tuple(x), x = 5), A); + +DROP TABLE t; From 54ceb3d32a7bb490ba7f202a511607f0ea21ae5b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 12:47:00 +0000 Subject: [PATCH 29/53] add some comments --- src/Processors/QueryPlan/PartsSplitter.cpp | 2 ++ .../test_final_bug_with_pk_columns_loading/test.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index ec51875587e..64af48dd53c 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -128,6 +128,8 @@ class IndexAccess public: explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { + /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`) + /// and we need to use the same set of index columns across all parts. for (const auto & part : parts) loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size()); } diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py index e710b9942dc..61559913e05 100644 --- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py +++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py @@ -19,18 +19,24 @@ def start_cluster(): cluster.shutdown() -def test_simple_query_after_restart(start_cluster): +def test_simple_query_after_index_reload(start_cluster): node.query( """ create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + -- for this part the first columns is useless, so we have to use both insert into t select 42, number from numbers_mt(100); + + -- for this part the first columns is enough insert into t select number, number from numbers_mt(100); """ ) + # force reloading index node.restart_clickhouse() + # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, + # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) assert ( int( node.query( From 0f4efdaa4788dc5fd9e4ee96ca611eb35d63a29a Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 14:48:39 +0000 Subject: [PATCH 30/53] remove case from 03036_prewhere_lambda_function --- .../queries/0_stateless/03036_prewhere_lambda_function.reference | 1 - tests/queries/0_stateless/03036_prewhere_lambda_function.sql | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference index 470e4427d96..2599763b762 100644 --- a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference @@ -1,2 +1 @@ [4,5,6] -[4,5,6] diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql index 7a5da7ed689..8b9ebb775a3 100644 --- a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql @@ -3,6 +3,5 @@ CREATE TABLE t (A Array(Int64)) Engine = MergeTree ORDER BY tuple(); INSERT INTO t VALUES ([1,2,3]), ([4,5,6]), ([7,8,9]); SELECT * FROM t PREWHERE arrayExists(x -> x = 5, A); -SELECT * FROM t PREWHERE arrayExists(lamdba(tuple(x), x = 5), A); DROP TABLE t; From 39d706ba9f0c8e7f8c8d757e215f639f7d510fe2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 14:45:51 +0000 Subject: [PATCH 31/53] rework test --- .../__init__.py | 0 .../test.py | 53 ------------------- ...s_splitter_bug_and_index_loading.reference | 1 + ...3_parts_splitter_bug_and_index_loading.sql | 17 ++++++ 4 files changed, 18 insertions(+), 53 deletions(-) delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py deleted file mode 100644 index 61559913e05..00000000000 --- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py +++ /dev/null @@ -1,53 +0,0 @@ -import pytest -import logging - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", stay_alive=True) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - logging.info("Starting cluster...") - cluster.start() - logging.info("Cluster started") - - yield cluster - finally: - cluster.shutdown() - - -def test_simple_query_after_index_reload(start_cluster): - node.query( - """ - create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; - - -- for this part the first columns is useless, so we have to use both - insert into t select 42, number from numbers_mt(100); - - -- for this part the first columns is enough - insert into t select number, number from numbers_mt(100); - """ - ) - - # force reloading index - node.restart_clickhouse() - - # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, - # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) - assert ( - int( - node.query( - "select count() from t where not ignore(*)", - settings={ - "max_threads": 4, - "merge_tree_min_bytes_for_concurrent_read": 1, - "merge_tree_min_rows_for_concurrent_read": 1, - "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1, - }, - ) - ) - == 200 - ) diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference new file mode 100644 index 00000000000..08839f6bb29 --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference @@ -0,0 +1 @@ +200 diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql new file mode 100644 index 00000000000..541ac67fd24 --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql @@ -0,0 +1,17 @@ +create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + +-- for this part the first columns is useless, so we have to use both +insert into t select 42, number from numbers_mt(100); + +-- for this part the first columns is enough +insert into t select number, number from numbers_mt(100); + +-- force reloading index +detach table t; +attach table t; + +set merge_tree_min_bytes_for_concurrent_read=1, merge_tree_min_rows_for_concurrent_read=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=1.0, max_threads=4; + +-- the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, +-- because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) +select count() from t where not ignore(*); From b2bcfaf344047f629879143d6bb4efa00c22f7cb Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 5 Apr 2024 17:18:22 +0200 Subject: [PATCH 32/53] Reduce log levels for ReadWriteBufferFromHTTP retries --- src/IO/ReadWriteBufferFromHTTP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index c99b08d0c9d..303ffb744b5 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -345,7 +345,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, if (last_attempt || !is_retriable) { if (!mute_logging) - LOG_ERROR(log, + LOG_DEBUG(log, "Failed to make request to '{}'{}. " "Error: '{}'. " "Failed at try {}/{}.", @@ -361,7 +361,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, on_retry(); if (!mute_logging) - LOG_INFO(log, + LOG_TRACE(log, "Failed to make request to '{}'{}. " "Error: {}. " "Failed at try {}/{}. " From 6e413223c2560007bab6422117e4d284c3aefdd4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 5 Apr 2024 09:47:24 +0200 Subject: [PATCH 33/53] Use DETACHED_DIR_NAME everywhere Signed-off-by: Azat Khuzhin --- .../MergeTree/DataPartStorageOnDiskBase.cpp | 10 ++++--- src/Storages/MergeTree/DataPartsExchange.cpp | 4 +-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +-- src/Storages/MergeTree/MergeTreeData.cpp | 27 +++++++++---------- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 16 +++++------ 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 18e4c87b298..052e3ba4b74 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include namespace DB @@ -64,7 +66,7 @@ std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger auto full_relative_path = fs::path(root_path); if (detached) - full_relative_path /= "detached"; + full_relative_path /= MergeTreeData::DETACHED_DIR_NAME; std::optional original_checksums_content; std::optional original_files_list; @@ -109,7 +111,7 @@ bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(con if (!exists("checksums.txt")) return false; - auto storage_from_detached = create(volume, fs::path(root_path) / "detached", detached_part_path, /*initialize=*/ true); + auto storage_from_detached = create(volume, fs::path(root_path) / MergeTreeData::DETACHED_DIR_NAME, detached_part_path, /*initialize=*/ true); if (!storage_from_detached->exists("checksums.txt")) return false; @@ -490,7 +492,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); /// Do not initialize storage in case of DETACH because part may be broken. - bool to_detached = dir_path.starts_with("detached/"); + bool to_detached = dir_path.starts_with(std::string_view((fs::path(MergeTreeData::DETACHED_DIR_NAME) / "").string())); return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); } @@ -618,7 +620,7 @@ void DataPartStorageOnDiskBase::remove( if (part_dir_without_slash.has_parent_path()) { auto parent_path = part_dir_without_slash.parent_path(); - if (parent_path == "detached") + if (parent_path == MergeTreeData::DETACHED_DIR_NAME) throw Exception( ErrorCodes::LOGICAL_ERROR, "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 91444d76a52..cf7889c0aee 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include #include @@ -803,7 +803,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters."); auto part_dir = tmp_prefix + part_name; - auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); + auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? MergeTreeData::DETACHED_DIR_NAME : ""); auto volume = std::make_shared("volume_" + part_name, disk); /// Create temporary part storage to write sent files. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 8da46b39801..441437855ab 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1844,7 +1844,7 @@ try } catch (...) { - if (startsWith(new_relative_path, "detached/")) + if (startsWith(new_relative_path, fs::path(MergeTreeData::DETACHED_DIR_NAME) / "")) { // Don't throw when the destination is to the detached folder. It might be able to // recover in some cases, such as fetching parts into multi-disks while some of the @@ -1957,7 +1957,7 @@ std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const S DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); if (auto path = getRelativePathForPrefix(prefix, /* detached */ true, broken)) - return "detached/" + *path; + return fs::path(MergeTreeData::DETACHED_DIR_NAME) / *path; return {}; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 461d9a31eaa..dc15b8ab940 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -262,7 +262,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (need_create_directories) { disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } if (disk->exists(format_version_path)) @@ -1713,7 +1713,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME) + || it->name() == DETACHED_DIR_NAME) continue; if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version)) @@ -2796,7 +2796,7 @@ void MergeTreeData::dropAllData() && settings_ptr->allow_remote_fs_zero_copy_replication; try { - bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / "detached" / part.dir_name / "", part.dir_name); + bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / part.dir_name / "", part.dir_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", part.dir_name, keep_shared); } catch (...) @@ -2879,8 +2879,8 @@ void MergeTreeData::dropIfEmpty() if (disk->isBroken()) continue; /// Non recursive, exception is thrown if there are more files. - disk->removeFileIfExists(fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME); - disk->removeDirectory(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->removeFileIfExists(fs::path(relative_data_path) / FORMAT_VERSION_FILE_NAME); + disk->removeDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); disk->removeDirectory(relative_data_path); } } @@ -3443,7 +3443,7 @@ void MergeTreeData::changeSettings( { auto disk = new_storage_policy->getDiskByName(disk_name); disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } /// FIXME how would that be done while reloading configuration??? @@ -6037,7 +6037,7 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const for (const auto & disk : getDisks()) { - String detached_path = fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME; + String detached_path = fs::path(relative_data_path) / DETACHED_DIR_NAME; /// Note: we don't care about TOCTOU issue here. if (disk->exists(detached_path)) @@ -6063,7 +6063,7 @@ void MergeTreeData::validateDetachedPartName(const String & name) void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context) { - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); if (part) { @@ -6088,7 +6088,7 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { - bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / new_name / "", old_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", old_name, keep_shared); old_name.clear(); } @@ -6097,14 +6097,14 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, ContextPtr local_context, PartsTemporaryRename & renamed_parts) { - const String source_dir = "detached/"; + const fs::path source_dir = DETACHED_DIR_NAME; /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + if (temporary_parts.contains(source_dir / part_id)) { LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " "probably it's being detached right now", part_id); @@ -6181,7 +6181,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const LOG_DEBUG(log, "Checking part {}", new_name); auto single_disk_volume = std::make_shared("volume_" + old_name, disk); - auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir + new_name) + auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir / new_name) .withPartFormatFromDisk() .build(); @@ -7212,11 +7212,10 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const { - String additional_path = "detached/"; const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) - if (disk->exists(fs::path(relative_data_path) / additional_path / part_name)) + if (disk->exists(fs::path(relative_data_path) / DETACHED_DIR_NAME / part_name)) return disk; return nullptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c9f451b6bb1..6861b615cd6 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2024,7 +2024,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( bool attach_part, ContextPtr local_context) { PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, local_context, renamed_parts); for (size_t i = 0; i < loaded_parts.size(); ++i) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6ab56ba141c..73354e71e71 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1983,7 +1983,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { - for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / "detached/"); it->isValid(); it->next()) + for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); it->isValid(); it->next()) { const auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version); @@ -1993,7 +1993,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo const auto part_old_name = part_info->getPartNameV1(); const auto volume = std::make_shared("volume_" + part_old_name, disk); - auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path("detached") / part_old_name) + auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path(DETACHED_DIR_NAME) / part_old_name) .withPartFormatFromDisk() .build(); @@ -2440,7 +2440,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); LOG_INFO(log, "Detaching {}", part_dir); - auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + auto holder = getTemporaryPartDirectoryHolder(fs::path(DETACHED_DIR_NAME) / part_dir); part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -2967,7 +2967,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr part = get_part(); // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo("detached/" + entry.new_part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / entry.new_part_name, true); LOG_INFO(log, "Cloned part {} to detached directory", part->name); } @@ -4987,7 +4987,7 @@ bool StorageReplicatedMergeTree::fetchPart( { // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo(fs::path("detached") / part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / part_name, true); } } catch (const Exception & e) @@ -6547,7 +6547,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( assertNotReadonly(); PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. @@ -9986,7 +9986,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartExists(const String & part_n { fs::directory_iterator dir_end; for (const std::string & path : getDataPaths()) - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) if (dir_it->path().filename().string() == part_name) return true; return false; @@ -9999,7 +9999,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartitionExists(const String & p for (const std::string & path : getDataPaths()) { - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) { const String file_name = dir_it->path().filename().string(); auto part_info = MergeTreePartInfo::tryParsePartName(file_name, format_version); From 378d330d9dfa289c413f80c2addaf6dee5503093 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 17:07:43 +0000 Subject: [PATCH 34/53] better --- .../0_stateless/03033_parts_splitter_bug_and_index_loading.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql index 541ac67fd24..25ec1c8fd80 100644 --- a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql @@ -1,5 +1,7 @@ create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; +system stop merges t; + -- for this part the first columns is useless, so we have to use both insert into t select 42, number from numbers_mt(100); From 0bce544779bd881aa3218694545fe5a8017ee9a4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Fri, 5 Apr 2024 23:07:00 +0200 Subject: [PATCH 35/53] Update base/poco/Net/src/HTTPClientSession.cpp Co-authored-by: Nikita Taranov --- base/poco/Net/src/HTTPClientSession.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index e489ab56b98..c9899266be7 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -248,7 +248,7 @@ void HTTPClientSession::setKeepAliveRequest(int request) - void HTTPClientSession::setLastRequest(Poco::Timestamp time) +void HTTPClientSession::setLastRequest(Poco::Timestamp time) { if (connected()) { From f766ec678206c0b0e5f0eac0d142583fa47d89cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 23:19:30 +0200 Subject: [PATCH 36/53] review remarks --- src/Common/HTTPConnectionPool.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 7f99d6a647f..167aeee68f3 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -213,7 +213,8 @@ public: --total_connections_in_group; - const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 20 : 1; + const size_t gap = 20; + const size_t reduced_warning_limit = limits.warning_limit > gap ? limits.warning_limit - gap : 1; if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) { LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); @@ -289,8 +290,7 @@ private: auto timeouts = getTimeouts(*this); auto new_connection = lock->getConnection(timeouts); Session::assign(*new_connection); - if (Session::getKeepAliveRequest() == 0) - Session::setKeepAliveRequest(1); + Session::setKeepAliveRequest(Session::getKeepAliveRequest() + 1); } else { @@ -425,7 +425,7 @@ private: ConnectionGroup::Ptr group_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...) + : Session(std::forward(args)...) , pool(std::move(pool_)) , group(group_) , metrics(std::move(metrics_)) From 664823463b23d00d2aa4293bdea763112b652ddb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 21:46:43 +0200 Subject: [PATCH 37/53] Do not create a directory for UDF in clickhouse-client if it does not exist --- .../UserDefined/UserDefinedSQLObjectsDiskStorage.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index b083c540083..d874612ad04 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -56,7 +56,6 @@ UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const Context , dir_path{makeDirectoryPathCanonical(dir_path_)} , log{getLogger("UserDefinedSQLObjectsLoaderFromDisk")} { - createDirectory(); } @@ -122,7 +121,12 @@ void UserDefinedSQLObjectsDiskStorage::reloadObjects() void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() { LOG_INFO(log, "Loading user defined objects from {}", dir_path); - createDirectory(); + + if (!std::filesystem::exists(dir_path)) + { + LOG_DEBUG(log, "The directory for user defined objects ({}) does not exist: nothing to load", dir_path); + return; + } std::vector> function_names_and_queries; @@ -157,7 +161,6 @@ void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() void UserDefinedSQLObjectsDiskStorage::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) { - createDirectory(); auto ast = tryLoadObject(object_type, object_name); if (ast) setObject(object_name, *ast); @@ -185,6 +188,7 @@ bool UserDefinedSQLObjectsDiskStorage::storeObjectImpl( bool replace_if_exists, const Settings & settings) { + createDirectory(); String file_path = getFilePath(object_type, object_name); LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path); From c5e47bbe70e232188e36d0599e29605db4905861 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 21:52:04 +0200 Subject: [PATCH 38/53] Add a test --- .../03033_analyzer_query_parameters.sh | 4 ++-- ...udf_user_defined_directory_in_client.reference | 1 + .../03036_udf_user_defined_directory_in_client.sh | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference create mode 100755 tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh index c821791e437..cf46067df99 100755 --- a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" -clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference new file mode 100644 index 00000000000..251d054748a --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference @@ -0,0 +1 @@ +Unknown function diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh new file mode 100755 index 00000000000..e0a145d8456 --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test (s String) ENGINE = Memory" + +# Calling an unknown function should not lead to creation of a 'user_defined' directory in the current directory +${CLICKHOUSE_CLIENT} --query "INSERT INTO test VALUES (xyz('abc'))" 2>&1 | grep -o -F 'Unknown function' + +ls -ld user_defined 2> /dev/null + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" From f5e9a09d69ea0d1f961464e866c77a73c5c0e82e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 05:20:09 +0200 Subject: [PATCH 39/53] Remove support for INSERT WATCH query --- src/Client/ClientBase.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 14 +++----------- src/Interpreters/executeQuery.cpp | 11 ----------- src/Parsers/ASTInsertQuery.cpp | 7 +------ src/Parsers/ASTInsertQuery.h | 2 -- src/Parsers/ParserInsertQuery.cpp | 15 +-------------- 6 files changed, 6 insertions(+), 45 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7a3192d1d9c..8107bd94394 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1964,7 +1964,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert_with_inlined_data) + if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) { if (input_function && insert->format.empty()) throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fc58f7b5098..35ff65c2335 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -340,13 +340,10 @@ bool InterpreterInsertQuery::shouldAddSquashingFroStorage(const StoragePtr & tab { auto context_ptr = getContext(); const Settings & settings = context_ptr->getSettingsRef(); - const ASTInsertQuery * query = nullptr; - if (query_ptr) - query = query_ptr->as(); /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). - return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch); + return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash; } Chain InterpreterInsertQuery::buildPreSinkChain( @@ -429,7 +426,7 @@ BlockIO InterpreterInsertQuery::execute() std::vector presink_chains; std::vector sink_chains; - if (!distributed_pipeline || query.watch) + if (!distributed_pipeline) { /// Number of streams works like this: /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever @@ -560,11 +557,6 @@ BlockIO InterpreterInsertQuery::execute() } } } - else if (query.watch) - { - InterpreterWatchQuery interpreter_watch{ query.watch, getContext() }; - pipeline = interpreter_watch.buildQueryPipeline(); - } ThreadGroupPtr running_group; if (current_thread) @@ -591,7 +583,7 @@ BlockIO InterpreterInsertQuery::execute() { res.pipeline = std::move(*distributed_pipeline); } - else if (query.select || query.watch) + else if (query.select) { const auto & header = presink_chains.at(0).getInputHeader(); auto actions_dag = ActionsDAG::makeConvertingActions( diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ea2f69bd2b1..96a9c8d8c8e 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -644,15 +644,6 @@ void logExceptionBeforeStart( } } -static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context) -{ - if (auto * ast_insert_into = ast->as()) - { - if (ast_insert_into->watch) - context->setSetting("output_format_enable_streaming", 1); - } -} - void validateAnalyzerSettings(ASTPtr ast, bool context_value) { if (ast->as()) @@ -898,8 +889,6 @@ static std::tuple executeQueryImpl( if (auto * insert_query = ast->as()) insert_query->tail = istr; - setQuerySpecificSettings(ast, context); - /// There is an option of probabilistic logging of queries. /// If it is used - do the random sampling and "collapse" the settings. /// It allows to consistently log queries with all the subqueries in distributed query processing diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index 72a569fe047..8e3458539f3 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -123,13 +123,8 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << delim; select->formatImpl(settings, state, frame); } - else if (watch) - { - settings.ostr << delim; - watch->formatImpl(settings, state, frame); - } - if (!select && !watch) + if (!select) { if (!format.empty()) { diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index b0f444ed755..aeab0f148be 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -24,7 +24,6 @@ public: ASTPtr settings_ast; ASTPtr select; - ASTPtr watch; ASTPtr infile; ASTPtr compression; @@ -63,7 +62,6 @@ public: if (partition_by) { res->partition_by = partition_by->clone(); res->children.push_back(res->partition_by); } if (settings_ast) { res->settings_ast = settings_ast->clone(); res->children.push_back(res->settings_ast); } if (select) { res->select = select->clone(); res->children.push_back(res->select); } - if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } if (infile) { res->infile = infile->clone(); res->children.push_back(res->infile); } if (compression) { res->compression = compression->clone(); res->children.push_back(res->compression); } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index d1171dd4815..9373e6a1c93 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -36,7 +36,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_format(Keyword::FORMAT); ParserKeyword s_settings(Keyword::SETTINGS); ParserKeyword s_select(Keyword::SELECT); - ParserKeyword s_watch(Keyword::WATCH); ParserKeyword s_partition_by(Keyword::PARTITION_BY); ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); @@ -56,7 +55,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr columns; ASTPtr format; ASTPtr select; - ASTPtr watch; ASTPtr table_function; ASTPtr settings_ast; ASTPtr partition_by_expr; @@ -143,7 +141,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String format_str; Pos before_values = pos; - /// VALUES or FORMAT or SELECT or WITH or WATCH. + /// VALUES or FORMAT or SELECT or WITH. /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. if (!infile && s_values.ignore(pos, expected)) { @@ -175,14 +173,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(format, format_str); } - else if (!infile && s_watch.ignore(pos, expected)) - { - /// If WATCH is defined, return to position before WATCH and parse - /// rest of query as WATCH query. - pos = before_values; - ParserWatchQuery watch_p; - watch_p.parse(pos, watch, expected); - } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect @@ -286,7 +276,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->columns = columns; query->format = std::move(format_str); query->select = select; - query->watch = watch; query->settings_ast = settings_ast; query->data = data != end ? data : nullptr; query->end = end; @@ -295,8 +284,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->children.push_back(columns); if (select) query->children.push_back(select); - if (watch) - query->children.push_back(watch); if (settings_ast) query->children.push_back(settings_ast); From 064acacd93a7de86cd66bf551905b9ff365a9eef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 12:43:10 +0200 Subject: [PATCH 40/53] Update test --- .../0_stateless/02263_format_insert_settings.reference | 10 ---------- .../0_stateless/02263_format_insert_settings.sh | 2 -- 2 files changed, 12 deletions(-) diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 2bba75f6788..ea8b78faf8c 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -21,10 +21,6 @@ INSERT INTO foo FORMAT Values INSERT INTO foo SELECT 1 [oneline] insert into foo select 1 INSERT INTO foo SELECT 1 -[multi] insert into foo watch bar -INSERT INTO foo WATCH bar -[oneline] insert into foo watch bar -INSERT INTO foo WATCH bar [multi] insert into foo format tsv INSERT INTO foo FORMAT tsv [oneline] insert into foo format tsv @@ -41,12 +37,6 @@ SETTINGS max_threads = 1 SELECT 1 [oneline] insert into foo settings max_threads=1 select 1 INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 -[multi] insert into foo settings max_threads=1 watch bar -INSERT INTO foo -SETTINGS max_threads = 1 -WATCH bar -[oneline] insert into foo settings max_threads=1 watch bar -INSERT INTO foo SETTINGS max_threads = 1 WATCH bar [multi] insert into foo settings max_threads=1 format tsv INSERT INTO foo SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 49aa56d6c0a..808ab23ee59 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -40,12 +40,10 @@ $CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' run_format_both 'insert into foo select 1' -run_format_both 'insert into foo watch bar' run_format_both 'insert into foo format tsv' run_format_both 'insert into foo settings max_threads=1 values' run_format_both 'insert into foo settings max_threads=1 select 1' -run_format_both 'insert into foo settings max_threads=1 watch bar' run_format_both 'insert into foo settings max_threads=1 format tsv' run_format_both 'insert into foo select 1 settings max_threads=1' run_format_both 'insert into foo settings max_threads=1 select 1 settings max_threads=1' From 444016fb3ee585cae98df9f16285b9c0fff6577a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 11:20:30 +0000 Subject: [PATCH 41/53] CI: fix unittest issue --- tests/ci/ci.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 36e9b183805..f60c40f5860 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -773,6 +773,7 @@ class CiOptions: not pr_info.is_pr() and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only + # if debug_message is provided - it's a test return res message = debug_message or GitRunner(set_cwd_to_git_root=True).run( f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" @@ -790,9 +791,9 @@ class CiOptions: print(f"CI tags from PR body: [{matches_pr}]") matches = list(set(matches + matches_pr)) - if "do not test" in pr_info.labels: - # do_not_test could be set in GH labels - res.do_not_test = True + if "do not test" in pr_info.labels: + # do_not_test could be set in GH labels + res.do_not_test = True for match in matches: if match.startswith("job_"): From 094f94882c972a798ace493ca9a7019255a64f7b Mon Sep 17 00:00:00 2001 From: Ilya Andreev <18560147+andreev-io@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:35:03 +0100 Subject: [PATCH 42/53] Fix a typo in the documentation of the ALTER TABLE ... MODIFY QUERY statement --- docs/en/sql-reference/statements/alter/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 59045afdeb6..e063b27424e 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -8,7 +8,7 @@ sidebar_label: VIEW You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. +This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** From 83d1f1a8769d3be8d78f48db82873b9438ac87f4 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 11:51:59 +0000 Subject: [PATCH 43/53] CI: fix for docs only pr --- tests/ci/ci.py | 7 ++++--- tests/ci/pr_info.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 36e9b183805..f60c40f5860 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -773,6 +773,7 @@ class CiOptions: not pr_info.is_pr() and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only + # if debug_message is provided - it's a test return res message = debug_message or GitRunner(set_cwd_to_git_root=True).run( f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" @@ -790,9 +791,9 @@ class CiOptions: print(f"CI tags from PR body: [{matches_pr}]") matches = list(set(matches + matches_pr)) - if "do not test" in pr_info.labels: - # do_not_test could be set in GH labels - res.do_not_test = True + if "do not test" in pr_info.labels: + # do_not_test could be set in GH labels + res.do_not_test = True for match in matches: if match.startswith("job_"): diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ddf59c49e1f..204284785c9 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -26,6 +26,7 @@ NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] DIFF_IN_DOCUMENTATION_EXT = [ ".html", ".md", + ".mdx", ".yml", ".txt", ".css", From e4b0ca5d836e14fada7592777a0443914bfbaa47 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 12:59:20 +0000 Subject: [PATCH 44/53] Fix filter pushdown from additional_table_filters in Merge engine in analyzer --- src/Planner/PlannerJoinTree.cpp | 3 ++- src/Storages/StorageDummy.h | 6 ++++++ ...03033_analyzer_merge_engine_filter_push_down.reference | 3 +++ .../03033_analyzer_merge_engine_filter_push_down.sql | 8 ++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference create mode 100644 tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d2f37ff1ad4..534080f1739 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -814,7 +814,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); - if (storage->supportsPrewhere() && optimize_move_to_prewhere) + auto supported_prewhere_columns = storage->supportedPrewhereColumns(); + if (storage->canMoveConditionsToPrewhere() && optimize_move_to_prewhere && (!supported_prewhere_columns || supported_prewhere_columns->contains(filter_info.column_name))) { if (!prewhere_info) prewhere_info = std::make_shared(); diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index e9d8f90f755..ae9bf2483e1 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -19,6 +19,12 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } + + std::optional supportedPrewhereColumns() const override + { + return original_storage_snapshot ? original_storage_snapshot->storage.supportedPrewhereColumns() : std::nullopt; + } + bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference new file mode 100644 index 00000000000..86a00059854 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference @@ -0,0 +1,3 @@ +UInt32 1 +UInt32 2 +UInt32 3 diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql new file mode 100644 index 00000000000..9be1152bbbf --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql @@ -0,0 +1,8 @@ +set allow_suspicious_low_cardinality_types=1; +drop table if exists test; +create table test (`x` LowCardinality(Nullable(UInt32)), `y` String) engine = MergeTree order by tuple(); +insert into test values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +create table m_table (x UInt32, y String) engine = Merge(currentDatabase(), 'test*'); +select toTypeName(x), x FROM m_table SETTINGS additional_table_filters = {'m_table':'x != 4'}, optimize_move_to_prewhere=1, allow_experimental_analyzer=1; +drop table test; + From 5e1c1b6b94d920fbfb361c0cf606728f730e149a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 13:41:44 +0000 Subject: [PATCH 45/53] CI: test merge queue --- tests/ci/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f60c40f5860..c2962c5b40e 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -318,7 +318,7 @@ class CiCache: self.update() if self.cache_data_fetched: - # there are no record w/o underling data - no need to fetch + # there are no records without fetched data - no need to fetch return self # clean up From f3dc77ee00008f640b4d1f47445a223bbe286000 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 14:15:49 +0000 Subject: [PATCH 46/53] disable autofix for merge queue --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 373fa7b316f..4580f007606 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -131,6 +131,11 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() + + if pr_info.is_merge_queue() and args.push: + print("Auto style fix will be disabled for Merge Queue workflow") + args.push = False + run_cpp_check = True run_shell_check = True run_python_check = True From 603824748d7bec40d8dc7b30a33a988e214c3328 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 15:03:13 +0000 Subject: [PATCH 47/53] CI: disable finish check for mq --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ff0adee1443..74ce8452de8 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -157,7 +157,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }} needs: [Tests_1, Tests_2] runs-on: [self-hosted, style-checker] steps: From baa62cdaeeb23aba770efe6368bba6ec97cf6214 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 16:09:47 +0000 Subject: [PATCH 48/53] CI: no CI Running status for MQ --- tests/ci/run_check.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 6187656983e..435a5f726f2 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -201,14 +201,17 @@ def main(): ci_report_url = create_ci_report(pr_info, []) print("::notice ::Can run") - post_commit_status( - commit, - PENDING, - ci_report_url, - description, - CI_STATUS_NAME, - pr_info, - ) + + if not pr_info.is_merge_queue(): + # we need clean CI status for MQ to merge (no pending statuses) + post_commit_status( + commit, + PENDING, + ci_report_url, + description, + CI_STATUS_NAME, + pr_info, + ) if __name__ == "__main__": From 39c6188a2c0b7014136e1d9d9f16c684741fb0cb Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 16:38:19 +0000 Subject: [PATCH 49/53] Fix logical error 'numbers_storage.step != UInt64{0}' --- src/TableFunctions/TableFunctionNumbers.cpp | 4 ++++ .../03037_zero_step_in_numbers_table_function.reference | 0 .../0_stateless/03037_zero_step_in_numbers_table_function.sql | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference create mode 100644 tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 2989eb5fbef..16f56eab981 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int BAD_ARGUMENTS; } namespace @@ -78,6 +79,9 @@ StoragePtr TableFunctionNumbers::executeImpl( UInt64 length = arguments.size() >= 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); UInt64 step = arguments.size() == 3 ? evaluateArgument(context, arguments[2]) : 1; + if (!step) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function {} requires step to be a positive number", getName()); + auto res = std::make_shared( StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset, step); res->startup(); diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql new file mode 100644 index 00000000000..08fafd6ddfa --- /dev/null +++ b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql @@ -0,0 +1,2 @@ +select * from numbers(1, 10, 0); -- {serverError BAD_ARGUMENTS} + From b318091528eed9db6d04d25bae115d24d3b82eb8 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 17:17:04 +0000 Subject: [PATCH 50/53] Don't check overflow in dotProduct in undefined sanitizer --- src/Functions/array/arrayDotProduct.cpp | 4 ++-- .../queries/0_stateless/03037_dot_product_overflow.reference | 1 + tests/queries/0_stateless/03037_dot_product_overflow.sql | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03037_dot_product_overflow.reference create mode 100644 tests/queries/0_stateless/03037_dot_product_overflow.sql diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 783843a89d5..4551140acc3 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -66,13 +66,13 @@ struct DotProduct }; template - static void accumulate(State & state, Type x, Type y) + static NO_SANITIZE_UNDEFINED void accumulate(State & state, Type x, Type y) { state.sum += x * y; } template - static void combine(State & state, const State & other_state) + static NO_SANITIZE_UNDEFINED void combine(State & state, const State & other_state) { state.sum += other_state.sum; } diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.reference b/tests/queries/0_stateless/03037_dot_product_overflow.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.sql b/tests/queries/0_stateless/03037_dot_product_overflow.sql new file mode 100644 index 00000000000..94d5eba6255 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.sql @@ -0,0 +1,2 @@ +select ignore(dotProduct(materialize([9223372036854775807, 1]), materialize([-3, 1]))); + From 2280fdeec1d41fdeb7a09459577312de8dc70bec Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Apr 2024 19:16:47 +0000 Subject: [PATCH 51/53] Empty commit From b138b1e103d6ccba62620b849931a9a607e9a42b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Apr 2024 19:18:44 +0000 Subject: [PATCH 52/53] Empty commit From 4e344f6a9397d27a15af82023b182469eaeebd35 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 9 Apr 2024 10:25:41 +0000 Subject: [PATCH 53/53] remove ci status and reports for MQ case --- tests/ci/commit_status_helper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index bda2db13991..56728c3d3ba 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -148,6 +148,11 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: """It adds or updates the comment status to all Pull Requests but for release one, so the method does nothing for simple pushes and pull requests with `release`/`release-lts` labels""" + + if pr_info.is_merge_queue(): + # skip report creation for the MQ + return + # to reduce number of parameters, the Github is constructed on the fly gh = Github() gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access @@ -441,7 +446,9 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> or pr_info.release_pr or pr_info.number == 0 ) - if not_run: + + # FIXME: For now, always set mergeable check in the Merge Queue. It's required to pass MQ + if not_run and not pr_info.is_merge_queue(): # Let's avoid unnecessary work return