Merge remote-tracking branch 'rschu1ze/master' into locate-mysql

This commit is contained in:
Robert Schulze 2024-03-11 12:23:15 +00:00
commit e43ac49d4e
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
241 changed files with 5490 additions and 2929 deletions

View File

@ -319,7 +319,8 @@ if (COMPILER_CLANG)
endif()
endif ()
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off")
# Our built-in unwinder only supports DWARF version up to 4.
set (DEBUG_INFO_FLAGS "-g")

View File

@ -45,6 +45,8 @@ namespace Net
~HTTPChunkedStreamBuf();
void close();
bool isComplete() const { return _chunk == std::char_traits<char>::eof(); }
protected:
int readFromDevice(char * buffer, std::streamsize length);
int writeToDevice(const char * buffer, std::streamsize length);
@ -68,6 +70,8 @@ namespace Net
~HTTPChunkedIOS();
HTTPChunkedStreamBuf * rdbuf();
bool isComplete() const { return _buf.isComplete(); }
protected:
HTTPChunkedStreamBuf _buf;
};

View File

@ -210,7 +210,7 @@ namespace Net
void setKeepAliveTimeout(const Poco::Timespan & timeout);
/// Sets the connection timeout for HTTP connections.
const Poco::Timespan & getKeepAliveTimeout() const;
Poco::Timespan getKeepAliveTimeout() const;
/// Returns the connection timeout for HTTP connections.
virtual std::ostream & sendRequest(HTTPRequest & request);
@ -275,7 +275,7 @@ namespace Net
/// This method should only be called if the request contains
/// a "Expect: 100-continue" header.
void flushRequest();
virtual void flushRequest();
/// Flushes the request stream.
///
/// Normally this method does not need to be called.
@ -283,7 +283,7 @@ namespace Net
/// fully sent if receiveResponse() is not called, e.g.,
/// because the underlying socket will be detached.
void reset();
virtual void reset();
/// Resets the session and closes the socket.
///
/// The next request will initiate a new connection,
@ -303,6 +303,9 @@ namespace Net
/// Returns true if the proxy should be bypassed
/// for the current host.
const Poco::Timestamp & getLastRequest() const;
/// Returns time when connection has been used last time
protected:
enum
{
@ -338,6 +341,10 @@ namespace Net
/// Calls proxyConnect() and attaches the resulting StreamSocket
/// to the HTTPClientSession.
void setLastRequest(Poco::Timestamp time);
void assign(HTTPClientSession & session);
HTTPSessionFactory _proxySessionFactory;
/// Factory to create HTTPClientSession to proxy.
private:
@ -433,11 +440,20 @@ namespace Net
}
inline const Poco::Timespan & HTTPClientSession::getKeepAliveTimeout() const
inline Poco::Timespan HTTPClientSession::getKeepAliveTimeout() const
{
return _keepAliveTimeout;
}
inline const Poco::Timestamp & HTTPClientSession::getLastRequest() const
{
return _lastRequest;
}
inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
{
_lastRequest = time;
}
}
} // namespace Poco::Net

View File

@ -48,6 +48,8 @@ namespace Net
HTTPFixedLengthStreamBuf(HTTPSession & session, ContentLength length, openmode mode);
~HTTPFixedLengthStreamBuf();
bool isComplete() const;
protected:
int readFromDevice(char * buffer, std::streamsize length);
int writeToDevice(const char * buffer, std::streamsize length);
@ -67,6 +69,8 @@ namespace Net
~HTTPFixedLengthIOS();
HTTPFixedLengthStreamBuf * rdbuf();
bool isComplete() const { return _buf.isComplete(); }
protected:
HTTPFixedLengthStreamBuf _buf;
};

View File

@ -64,6 +64,15 @@ namespace Net
Poco::Timespan getTimeout() const;
/// Returns the timeout for the HTTP session.
Poco::Timespan getConnectionTimeout() const;
/// Returns connection timeout for the HTTP session.
Poco::Timespan getSendTimeout() const;
/// Returns send timeout for the HTTP session.
Poco::Timespan getReceiveTimeout() const;
/// Returns receive timeout for the HTTP session.
bool connected() const;
/// Returns true if the underlying socket is connected.
@ -217,12 +226,25 @@ namespace Net
return _keepAlive;
}
inline Poco::Timespan HTTPSession::getTimeout() const
{
return _receiveTimeout;
}
inline Poco::Timespan HTTPSession::getConnectionTimeout() const
{
return _connectionTimeout;
}
inline Poco::Timespan HTTPSession::getSendTimeout() const
{
return _sendTimeout;
}
inline Poco::Timespan HTTPSession::getReceiveTimeout() const
{
return _receiveTimeout;
}
inline StreamSocket & HTTPSession::socket()
{

View File

@ -63,6 +63,8 @@ namespace Net
~HTTPIOS();
HTTPStreamBuf * rdbuf();
bool isComplete() const { return false; }
protected:
HTTPStreamBuf _buf;
};

View File

@ -49,10 +49,12 @@ HTTPChunkedStreamBuf::~HTTPChunkedStreamBuf()
void HTTPChunkedStreamBuf::close()
{
if (_mode & std::ios::out)
if (_mode & std::ios::out && _chunk != std::char_traits<char>::eof())
{
sync();
_session.write("0\r\n\r\n", 5);
_chunk = std::char_traits<char>::eof();
}
}

View File

@ -227,7 +227,7 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
{
_pRequestStream = 0;
_pResponseStream = 0;
_pResponseStream = 0;
clearException();
_responseReceived = false;
@ -501,5 +501,26 @@ bool HTTPClientSession::bypassProxy() const
else return false;
}
void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
{
poco_assert (this != &session);
if (session.buffered())
throw Poco::LogicException("assign a session with not empty buffered data");
if (buffered())
throw Poco::LogicException("assign to a session with not empty buffered data");
attachSocket(session.detachSocket());
setLastRequest(session.getLastRequest());
setResolvedHost(session.getResolvedHost());
setKeepAlive(session.getKeepAlive());
setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
setKeepAliveTimeout(session.getKeepAliveTimeout());
setProxyConfig(session.getProxyConfig());
session.reset();
}
} } // namespace Poco::Net

View File

@ -43,6 +43,12 @@ HTTPFixedLengthStreamBuf::~HTTPFixedLengthStreamBuf()
}
bool HTTPFixedLengthStreamBuf::isComplete() const
{
return _count == _length;
}
int HTTPFixedLengthStreamBuf::readFromDevice(char* buffer, std::streamsize length)
{
int n = 0;

View File

@ -343,10 +343,9 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ rg -ao "Found error:.*" fuzzer.log \
|| rg -ao "Exception:.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
echo "Achtung!" > description.txt
echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt
{ rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt
fi
if test -f core.*; then

1
docker/test/stateless/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/minio_data

View File

@ -60,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \
COPY run.sh /
COPY process_style_check_result.py /
CMD ["/bin/bash", "/run.sh"]

View File

@ -3954,6 +3954,7 @@ Possible values:
- `none` — Is similar to throw, but distributed DDL query returns no result set.
- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background.
- `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database
- `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database

View File

@ -201,12 +201,12 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-F`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `--format`, `--output-format` — output format, `TSV` by default.
- `-f`, `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.
- `--stacktrace` — whether to dump debug output in case of exception.
- `--echo` — print query before execution.

View File

@ -19,7 +19,7 @@ empty([x])
An array is considered empty if it does not contain any elements.
:::note
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
:::
The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty).
@ -104,17 +104,416 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
Alias: `OCTET_LENGTH`
## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64
## emptyArrayUInt8
## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64
Returns an empty UInt8 array.
## emptyArrayFloat32, emptyArrayFloat64
**Syntax**
## emptyArrayDate, emptyArrayDateTime
```sql
emptyArrayUInt8()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt8();
```
Result:
```response
[]
```
## emptyArrayUInt16
Returns an empty UInt16 array.
**Syntax**
```sql
emptyArrayUInt16()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt16();
```
Result:
```response
[]
```
## emptyArrayUInt32
Returns an empty UInt32 array.
**Syntax**
```sql
emptyArrayUInt32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt32();
```
Result:
```response
[]
```
## emptyArrayUInt64
Returns an empty UInt64 array.
**Syntax**
```sql
emptyArrayUInt64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayUInt64();
```
Result:
```response
[]
```
## emptyArrayInt8
Returns an empty Int8 array.
**Syntax**
```sql
emptyArrayInt8()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt8();
```
Result:
```response
[]
```
## emptyArrayInt16
Returns an empty Int16 array.
**Syntax**
```sql
emptyArrayInt16()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt16();
```
Result:
```response
[]
```
## emptyArrayInt32
Returns an empty Int32 array.
**Syntax**
```sql
emptyArrayInt32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt32();
```
Result:
```response
[]
```
## emptyArrayInt64
Returns an empty Int64 array.
**Syntax**
```sql
emptyArrayInt64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayInt64();
```
Result:
```response
[]
```
## emptyArrayFloat32
Returns an empty Float32 array.
**Syntax**
```sql
emptyArrayFloat32()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayFloat32();
```
Result:
```response
[]
```
## emptyArrayFloat64
Returns an empty Float64 array.
**Syntax**
```sql
emptyArrayFloat64()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayFloat64();
```
Result:
```response
[]
```
## emptyArrayDate
Returns an empty Date array.
**Syntax**
```sql
emptyArrayDate()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayDate();
```
## emptyArrayDateTime
Returns an empty DateTime array.
**Syntax**
```sql
[]
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayDateTime();
```
Result:
```response
[]
```
## emptyArrayString
Accepts zero arguments and returns an empty array of the appropriate type.
Returns an empty String array.
**Syntax**
```sql
emptyArrayString()
```
**Arguments**
None.
**Returned value**
An empty array.
**Examples**
Query:
```sql
SELECT emptyArrayString();
```
Result:
```response
[]
```
## emptyArrayToSingle

View File

@ -5,7 +5,12 @@ sidebar_label: Window Functions
title: Window Functions
---
ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
Windows functions let you perform calculations across a set of rows that are related to the current row.
Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned.
## Standard Window Functions
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
| Feature | Support or workaround |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio
## ClickHouse-specific Window Functions
There are also the following window function that's specific to ClickHouse:
### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS])
Finds non-negative derivative for given `metric_column` by `timestamp_column`.
@ -33,40 +40,6 @@ The computed value is the following for each row:
- `0` for 1st row,
- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row.
## References
### GitHub Issues
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests
These tests contain the examples of the currently supported grammar:
https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
### Postgres Docs
https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
https://www.postgresql.org/docs/devel/functions-window.html
https://www.postgresql.org/docs/devel/tutorial-window.html
### MySQL Docs
https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
## Syntax
```text
@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
- `PARTITION BY` - defines how to break a resultset into groups.
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
- `WINDOW` - allows to reuse a window definition with multiple expressions.
### Functions
These functions can be used only as a window function.
- `row_number()` - Number the current row within its partition starting from 1.
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
- `WINDOW` - allows multiple expressions to use the same window definition.
```text
PARTITION
@ -112,8 +72,23 @@ These functions can be used only as a window function.
└─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION)
```
### Functions
These functions can be used only as a window function.
- `row_number()` - Number the current row within its partition starting from 1.
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
## Examples
Let's have a look at some examples of how window functions can be used.
```sql
CREATE TABLE wf_partition
(
@ -589,6 +564,41 @@ ORDER BY
└──────────────┴─────────────────────┴───────┴─────────────────────────┘
```
## References
### GitHub Issues
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests
These tests contain the examples of the currently supported grammar:
https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
### Postgres Docs
https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
https://www.postgresql.org/docs/devel/functions-window.html
https://www.postgresql.org/docs/devel/tutorial-window.html
### MySQL Docs
https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
## Related Content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -649,11 +649,22 @@ log_query_threads=1
## max_query_size {#settings-max_query_size}
查询的最大部分可以被带到RAM用于使用SQL解析器进行解析。
插入查询还包含由单独的流解析器消耗O(1)RAM处理的插入数据这些数据不包含在此限制中。
SQL 解析器解析的查询字符串的最大字节数。 INSERT 查询的 VALUES 子句中的数据由单独的流解析器(消耗 O(1) RAM处理并且不受此限制的影响。
默认值256KiB。
## max_parser_depth {#max_parser_depth}
限制递归下降解析器中的最大递归深度。允许控制堆栈大小。
可能的值:
- 正整数。
- 0 — 递归深度不受限制。
默认值1000。
## interactive_delay {#interactive-delay}
以微秒为单位的间隔,用于检查请求执行是否已被取消并发送进度。
@ -1064,6 +1075,28 @@ ClickHouse生成异常
默认值0。
## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns}
启用或禁用通过将某些函数转换为读取子列的优化。这减少了要读取的数据量。
这些函数可以转化为:
- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [empty](../../sql-reference/functions/array-functions.md/#empty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [notEmpty](../../sql-reference/functions/array-functions.md/#notempty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size子列。
- [isNull](../../sql-reference/operators/index.md#operator-is-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。
- [isNotNull](../../sql-reference/operators/index.md#is-not-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。
- [count](../../sql-reference/aggregate-functions/reference/count.md) 读取 [null](../../sql-reference/data-types/nullable.md/#finding-null) 子列。
- [mapKeys](../../sql-reference/functions/tuple-map-functions.mdx/#mapkeys) 读取 [keys](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。
- [mapValues](../../sql-reference/functions/tuple-map-functions.mdx/#mapvalues) 读取 [values](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。
可能的值:
- 0 — 禁用优化。
- 1 — 优化已启用。
默认值:`0`。
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- 类型:秒

View File

@ -1,7 +1,7 @@
---
slug: /zh/sql-reference/data-types/array
---
# 阵列(T) {#data-type-array}
# 数组(T) {#data-type-array}
`T` 类型元素组成的数组。
@ -66,3 +66,27 @@ SELECT array(1, 'a')
Received exception from server (version 1.1.54388):
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
```
## 数组大小 {#array-size}
可以使用 `size0` 子列找到数组的大小,而无需读取整个列。对于多维数组,您可以使用 `sizeN-1`,其中 `N` 是所需的维度。
**例子**
SQL查询
```sql
CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]);
SELECT arr.size0, arr.size1, arr.size2 FROM t_arr;
```
结果:
``` text
┌─arr.size0─┬─arr.size1─┬─arr.size2─┐
│ 1 │ [2] │ [[4,1]] │
└───────────┴───────────┴───────────┘
```

View File

@ -20,6 +20,33 @@ slug: /zh/sql-reference/data-types/nullable
掩码文件中的条目允许ClickHouse区分每个表行的对应数据类型的«NULL»和默认值由于有额外的文件«Nullable»列比普通列消耗更多的存储空间
## null子列 {#finding-null}
通过使用 `null` 子列可以在列中查找 `NULL` 值,而无需读取整个列。如果对应的值为 `NULL`,则返回 `1`,否则返回 `0`
**示例**
SQL查询:
``` sql
CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO nullable VALUES (1) (NULL) (2) (NULL);
SELECT n.null FROM nullable;
```
结果:
``` text
┌─n.null─┐
│ 0 │
│ 1 │
│ 0 │
│ 1 │
└────────┘
```
## 用法示例 {#yong-fa-shi-li}
``` sql

View File

@ -1,6 +1,5 @@
#include "LibraryBridgeHandlers.h"
#include "CatBoostLibraryHandler.h"
#include "CatBoostLibraryHandlerFactory.h"
#include "Common/ProfileEvents.h"
#include "ExternalDictionaryLibraryHandler.h"
@ -11,10 +10,8 @@
#include <IO/ReadHelpers.h>
#include <Common/BridgeProtocolVersion.h>
#include <IO/WriteHelpers.h>
#include <Poco/Net/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/ThreadPool.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Formats/IInputFormat.h>

View File

@ -8,13 +8,11 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Server/HTTP/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/NumberParser.h>
#include <Common/logger_useful.h>
#include <base/scope_guard.h>
#include <Common/BridgeProtocolVersion.h>
#include <Common/quoteString.h>
#include "getIdentifierQuote.h"

View File

@ -7,13 +7,10 @@
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Common/BridgeProtocolVersion.h>
#include <Common/logger_useful.h>
#include <base/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCPooledConnectionFactory.h"

View File

@ -3,12 +3,8 @@
#include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/logger_useful.h>
@ -54,21 +50,7 @@ Chunk ODBCSource::generate()
const auto & sample = description.sample_block.getByPosition(idx);
if (!result.is_null(idx))
{
bool is_nullable = description.types[idx].second;
if (is_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(column_nullable.getNestedColumn(), data_type.getNestedType(), description.types[idx].first, result, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
{
insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx);
}
}
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx);
else
insertDefaultValue(*columns[idx], *sample.column);
}
@ -87,59 +69,60 @@ void ODBCSource::insertValue(
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(row.get<uint16_t>(idx));
column.insert(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(row.get<uint16_t>(idx));
column.insert(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(row.get<uint32_t>(idx));
column.insert(row.get<uint32_t>(idx));
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(row.get<uint64_t>(idx));
column.insert(row.get<uint64_t>(idx));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(row.get<int16_t>(idx));
column.insert(row.get<int16_t>(idx));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(row.get<int16_t>(idx));
column.insert(row.get<int16_t>(idx));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(row.get<int32_t>(idx));
column.insert(row.get<int32_t>(idx));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(row.get<int64_t>(idx));
column.insert(row.get<int64_t>(idx));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(row.get<float>(idx));
column.insert(row.get<float>(idx));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(row.get<double>(idx));
column.insert(row.get<double>(idx));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtFixedString:
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(row.get<std::string>(idx));
column.insert(row.get<std::string>(idx));
break;
case ValueType::vtUUID:
{
auto value = row.get<std::string>(idx);
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
column.insert(parse<UUID>(value.data(), value.size()));
break;
}
case ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{row.get<std::string>(idx)}.getDayNum()});
column.insert(UInt16{LocalDate{row.get<std::string>(idx)}.getDayNum()});
break;
case ValueType::vtDateTime:
{
auto value = row.get<std::string>(idx);
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(data_type.get())->getTimeZone());
const DataTypeDateTime & datetime_type = assert_cast<const DataTypeDateTime &>(*data_type);
readDateTimeText(time, in, datetime_type.getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(time));
column.insert(static_cast<UInt32>(time));
break;
}
case ValueType::vtDateTime64:
@ -147,14 +130,14 @@ void ODBCSource::insertValue(
auto value = row.get<std::string>(idx);
ReadBufferFromString in(value);
DateTime64 time = 0;
const auto * datetime_type = assert_cast<const DataTypeDateTime64 *>(data_type.get());
readDateTime64Text(time, datetime_type->getScale(), in, datetime_type->getTimeZone());
assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(time);
const DataTypeDateTime64 & datetime_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
readDateTime64Text(time, datetime_type.getScale(), in, datetime_type.getTimeZone());
column.insert(time);
break;
}
case ValueType::vtDecimal32: [[fallthrough]];
case ValueType::vtDecimal64: [[fallthrough]];
case ValueType::vtDecimal128: [[fallthrough]];
case ValueType::vtDecimal32:
case ValueType::vtDecimal64:
case ValueType::vtDecimal128:
case ValueType::vtDecimal256:
{
auto value = row.get<std::string>(idx);

View File

@ -1,6 +1,5 @@
#include "ODBCBlockOutputStream.h"
#include <Common/logger_useful.h>
#include <IO/WriteBufferFromString.h>
#include <Interpreters/Context.h>
#include <Processors/Formats/IOutputFormat.h>

View File

@ -2,6 +2,7 @@
/metadata_dropped
/data
/store
/disks
/access
/flags
/dictionaries_lib

View File

@ -46,6 +46,7 @@
#include <Common/makeSocketAddress.h>
#include <Common/FailPoint.h>
#include <Common/CPUID.h>
#include <Common/HTTPConnectionPool.h>
#include <Server/waitServersToFinish.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Core/ServerUUID.h>
@ -1547,6 +1548,23 @@ try
FileCacheFactory::instance().updateSettingsFromConfig(*config);
HTTPConnectionPools::instance().setLimits(
HTTPConnectionPools::Limits{
new_server_settings.disk_connections_soft_limit,
new_server_settings.disk_connections_warn_limit,
new_server_settings.disk_connections_store_limit,
},
HTTPConnectionPools::Limits{
new_server_settings.storage_connections_soft_limit,
new_server_settings.storage_connections_warn_limit,
new_server_settings.storage_connections_store_limit,
},
HTTPConnectionPools::Limits{
new_server_settings.http_connections_soft_limit,
new_server_settings.http_connections_warn_limit,
new_server_settings.http_connections_store_limit,
});
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
/// Must be the last.

View File

@ -65,7 +65,7 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes
/// test mode for integration tests.
if (test_mode)
dst_buf = std::make_shared<WriteBufferFromHTTP>(Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT);
dst_buf = std::make_shared<WriteBufferFromHTTP>(HTTPConnectionGroupType::HTTP, Poco::URI(dst_file_path), Poco::Net::HTTPRequest::HTTP_PUT);
else
dst_buf = std::make_shared<WriteBufferFromFile>(dst_file_path);
@ -88,7 +88,7 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_
{
dst_path /= "store";
auto files_root = dst_path / prefix;
root_meta = std::make_shared<WriteBufferFromHTTP>(Poco::URI(files_root / ".index"), Poco::Net::HTTPRequest::HTTP_PUT);
root_meta = std::make_shared<WriteBufferFromHTTP>(HTTPConnectionGroupType::HTTP, Poco::URI(files_root / ".index"), Poco::Net::HTTPRequest::HTTP_PUT);
}
else
{
@ -112,7 +112,7 @@ void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_
if (test_mode)
{
auto files_root = dst_path / prefix;
directory_meta = std::make_shared<WriteBufferFromHTTP>(Poco::URI(dst_path / directory_prefix / ".index"), Poco::Net::HTTPRequest::HTTP_PUT);
directory_meta = std::make_shared<WriteBufferFromHTTP>(HTTPConnectionGroupType::HTTP, Poco::URI(dst_path / directory_prefix / ".index"), Poco::Net::HTTPRequest::HTTP_PUT);
}
else
{

View File

@ -154,7 +154,8 @@ enum class AccessType
M(SET_DEFINER, "", USER_NAME, ALL) \
\
M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \
M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CONNECTIONS_CACHE, "SYSTEM DROP CONNECTIONS CACHE, DROP CONNECTIONS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \

View File

@ -49,71 +49,135 @@ String QuotaTypeInfo::valueToStringWithName(QuotaValue value) const
const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
{
static constexpr auto make_info = [](const char * raw_name_, UInt64 output_denominator_)
static constexpr auto make_info = [](const char * raw_name_, String current_usage_description_, String max_allowed_usage_description_, UInt64 output_denominator_)
{
String init_name = raw_name_;
boost::to_lower(init_name);
String init_keyword = raw_name_;
boost::replace_all(init_keyword, "_", " ");
bool init_output_as_float = (output_denominator_ != 1);
return QuotaTypeInfo{raw_name_, std::move(init_name), std::move(init_keyword), init_output_as_float, output_denominator_};
return QuotaTypeInfo
{
.raw_name = raw_name_,
.name = std::move(init_name),
.keyword = std::move(init_keyword),
.current_usage_description = std::move(current_usage_description_),
.max_allowed_usage_description = std::move(max_allowed_usage_description_),
.output_as_float = init_output_as_float,
.output_denominator = output_denominator_
};
};
switch (type)
{
case QuotaType::QUERIES:
{
static const auto info = make_info("QUERIES", 1);
static const auto info = make_info(
"QUERIES",
"The current number of executed queries.",
"The maximum allowed number of queries of all types allowed to be executed.",
1
);
return info;
}
case QuotaType::QUERY_SELECTS:
{
static const auto info = make_info("QUERY_SELECTS", 1);
static const auto info = make_info(
"QUERY_SELECTS",
"The current number of executed SELECT queries.",
"The maximum allowed number of SELECT queries allowed to be executed.",
1
);
return info;
}
case QuotaType::QUERY_INSERTS:
{
static const auto info = make_info("QUERY_INSERTS", 1);
static const auto info = make_info(
"QUERY_INSERTS",
"The current number of executed INSERT queries.",
"The maximum allowed number of INSERT queries allowed to be executed.",
1
);
return info;
}
case QuotaType::ERRORS:
{
static const auto info = make_info("ERRORS", 1);
static const auto info = make_info(
"ERRORS",
"The current number of queries resulted in an error.",
"The maximum number of queries resulted in an error allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::RESULT_ROWS:
{
static const auto info = make_info("RESULT_ROWS", 1);
static const auto info = make_info(
"RESULT_ROWS",
"The current total number of rows in the result set of all queries within the current period of time.",
"The maximum total number of rows in the result set of all queries allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::RESULT_BYTES:
{
static const auto info = make_info("RESULT_BYTES", 1);
static const auto info = make_info(
"RESULT_BYTES",
"The current total number of bytes in the result set of all queries within the current period of time.",
"The maximum total number of bytes in the result set of all queries allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::READ_ROWS:
{
static const auto info = make_info("READ_ROWS", 1);
static const auto info = make_info(
"READ_ROWS",
"The current total number of rows read during execution of all queries within the current period of time.",
"The maximum number of rows to read during execution of all queries allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::READ_BYTES:
{
static const auto info = make_info("READ_BYTES", 1);
static const auto info = make_info(
"READ_BYTES",
"The current total number of bytes read during execution of all queries within the current period of time.",
"The maximum number of bytes to read during execution of all queries allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::EXECUTION_TIME:
{
static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */);
static const auto info = make_info(
"EXECUTION_TIME",
"The current total amount of time (in nanoseconds) spent to execute queries within the current period of time",
"The maximum amount of time (in nanoseconds) allowed for all queries to execute within the specified period of time",
1000000000 /* execution_time is stored in nanoseconds */
);
return info;
}
case QuotaType::WRITTEN_BYTES:
{
static const auto info = make_info("WRITTEN_BYTES", 1);
static const auto info = make_info(
"WRITTEN_BYTES",
"The current total number of bytes written during execution of all queries within the current period of time.",
"The maximum number of bytes to written during execution of all queries allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS:
{
static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1);
static const auto info = make_info(
"FAILED_SEQUENTIAL_AUtheNTICATIONS",
"The current number of consecutive authentication failures within the current period of time.",
"The maximum number of consecutive authentication failures allowed within the specified period of time.",
1
);
return info;
}
case QuotaType::MAX: break;

View File

@ -33,6 +33,8 @@ struct QuotaTypeInfo
const char * const raw_name = "";
const String name; /// Lowercased with underscores, e.g. "result_rows".
const String keyword; /// Uppercased with spaces, e.g. "RESULT ROWS".
const String current_usage_description;
const String max_allowed_usage_description;
const bool output_as_float = false;
const UInt64 output_denominator = 1;
String valueToString(QuotaValue value) const;

View File

@ -33,7 +33,7 @@ String toString(RowPolicyFilterType type)
const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType type_)
{
static constexpr auto make_info = [](const char * raw_name_)
static constexpr auto make_info = [](const char * raw_name_, const String & comment_)
{
String init_name = raw_name_;
boost::to_lower(init_name);
@ -41,14 +41,17 @@ const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType
String init_command = init_name.substr(0, underscore_pos);
boost::to_upper(init_command);
bool init_is_check = (std::string_view{init_name}.substr(underscore_pos + 1) == "check");
return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), init_is_check};
return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), comment_, init_is_check};
};
switch (type_)
{
case RowPolicyFilterType::SELECT_FILTER:
{
static const auto info = make_info("SELECT_FILTER");
static const auto info = make_info(
"SELECT_FILTER",
"Expression which is used for filtering in SELECT queries."
);
return info;
}
#if 0 /// Row-level security for INSERT, UPDATE, DELETE is not implemented yet.

View File

@ -52,6 +52,7 @@ struct RowPolicyFilterTypeInfo
const char * const raw_name;
const String name; /// Lowercased with underscores, e.g. "select_filter".
const String command; /// Uppercased without last word, e.g. "SELECT".
const String description;
const bool is_check; /// E.g. false for SELECT_FILTER.
static const RowPolicyFilterTypeInfo & get(RowPolicyFilterType type);
};

View File

@ -36,7 +36,7 @@ public:
Result authenticateRequest(Poco::Net::HTTPRequest & request) const
{
auto session = makeHTTPSession(uri, timeouts);
auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, uri, timeouts);
Poco::Net::HTTPResponse response;
auto milliseconds_to_wait = retry_initial_backoff_ms;

View File

@ -1,5 +1,7 @@
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitorToString.h>
#include <Common/SipHash.h>
@ -38,6 +40,70 @@ ConstantNode::ConstantNode(Field value_)
: ConstantNode(value_, applyVisitor(FieldToDataType(), value_))
{}
bool ConstantNode::requiresCastCall() const
{
const auto & constant_value_literal = constant_value->getValue();
bool need_to_add_cast_function = false;
auto constant_value_literal_type = constant_value_literal.getType();
WhichDataType constant_value_type(constant_value->getType());
switch (constant_value_literal_type)
{
case Field::Types::String:
{
need_to_add_cast_function = !constant_value_type.isString();
break;
}
case Field::Types::UInt64:
case Field::Types::Int64:
case Field::Types::Float64:
{
WhichDataType constant_value_field_type(applyVisitor(FieldToDataType(), constant_value_literal));
need_to_add_cast_function = constant_value_field_type.idx != constant_value_type.idx;
break;
}
case Field::Types::Int128:
case Field::Types::UInt128:
case Field::Types::Int256:
case Field::Types::UInt256:
case Field::Types::Decimal32:
case Field::Types::Decimal64:
case Field::Types::Decimal128:
case Field::Types::Decimal256:
case Field::Types::AggregateFunctionState:
case Field::Types::Array:
case Field::Types::Tuple:
case Field::Types::Map:
case Field::Types::UUID:
case Field::Types::Bool:
case Field::Types::Object:
case Field::Types::IPv4:
case Field::Types::IPv6:
case Field::Types::Null:
case Field::Types::CustomType:
{
need_to_add_cast_function = true;
break;
}
}
// Add cast if constant was created as a result of constant folding.
// Constant folding may lead to type transformation and literal on shard
// may have a different type.
return need_to_add_cast_function || source_expression != nullptr;
}
bool ConstantNode::receivedFromInitiatorServer() const
{
if (!hasSourceExpression())
return false;
auto * cast_function = getSourceExpression()->as<FunctionNode>();
if (!cast_function || cast_function->getFunctionName() != "_CAST")
return false;
return true;
}
void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this);
@ -89,54 +155,7 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
if (!options.add_cast_for_constants)
return constant_value_ast;
bool need_to_add_cast_function = false;
auto constant_value_literal_type = constant_value_literal.getType();
WhichDataType constant_value_type(constant_value->getType());
switch (constant_value_literal_type)
{
case Field::Types::String:
{
need_to_add_cast_function = !constant_value_type.isString();
break;
}
case Field::Types::UInt64:
case Field::Types::Int64:
case Field::Types::Float64:
{
WhichDataType constant_value_field_type(applyVisitor(FieldToDataType(), constant_value_literal));
need_to_add_cast_function = constant_value_field_type.idx != constant_value_type.idx;
break;
}
case Field::Types::Int128:
case Field::Types::UInt128:
case Field::Types::Int256:
case Field::Types::UInt256:
case Field::Types::Decimal32:
case Field::Types::Decimal64:
case Field::Types::Decimal128:
case Field::Types::Decimal256:
case Field::Types::AggregateFunctionState:
case Field::Types::Array:
case Field::Types::Tuple:
case Field::Types::Map:
case Field::Types::UUID:
case Field::Types::Bool:
case Field::Types::Object:
case Field::Types::IPv4:
case Field::Types::IPv6:
case Field::Types::Null:
case Field::Types::CustomType:
{
need_to_add_cast_function = true;
break;
}
}
// Add cast if constant was created as a result of constant folding.
// Constant folding may lead to type transformation and literal on shard
// may have a different type.
if (need_to_add_cast_function || source_expression != nullptr)
if (requiresCastCall())
{
auto constant_type_name_ast = std::make_shared<ASTLiteral>(constant_value->getType()->getName());
return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast));

View File

@ -75,6 +75,12 @@ public:
return constant_value->getType();
}
/// Check if conversion to AST requires wrapping with _CAST function.
bool requiresCastCall() const;
/// Check if constant is a result of _CAST function constant folding.
bool receivedFromInitiatorServer() const;
void setMaskId(size_t id)
{
mask_id = id;

View File

@ -58,8 +58,12 @@ bool CatBoostLibraryBridgeHelper::bridgeHandShake()
String result;
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
readString(result, buf);
auto buf = BuilderRWBufferFromHTTP(getPingURI())
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withTimeouts(http_timeouts)
.create(credentials);
readString(result, *buf);
}
catch (...)
{
@ -79,29 +83,29 @@ ExternalModelInfos CatBoostLibraryBridgeHelper::listModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIST_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &) {},
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_LIST_METHOD))
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.create(credentials);
ExternalModelInfos result;
UInt64 num_rows;
readIntBinary(num_rows, buf);
readIntBinary(num_rows, *buf);
for (UInt64 i = 0; i < num_rows; ++i)
{
ExternalModelInfo info;
readStringBinary(info.model_path, buf);
readStringBinary(info.model_type, buf);
readStringBinary(info.model_path, *buf);
readStringBinary(info.model_type, *buf);
UInt64 t;
readIntBinary(t, buf);
readIntBinary(t, *buf);
info.loading_start_time = std::chrono::system_clock::from_time_t(t);
readIntBinary(t, buf);
readIntBinary(t, *buf);
info.loading_duration = std::chrono::milliseconds(t);
result.push_back(info);
@ -116,17 +120,19 @@ void CatBoostLibraryBridgeHelper::removeModel()
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEMODEL_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_REMOVEMODEL_METHOD))
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.withOutCallback(
[this](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path);
})
.create(credentials);
String result;
readStringBinary(result, buf);
readStringBinary(result, *buf);
assert(result == "1");
}
@ -134,14 +140,14 @@ void CatBoostLibraryBridgeHelper::removeAllModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &){},
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD))
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.create(credentials);
String result;
readStringBinary(result, buf);
readStringBinary(result, *buf);
assert(result == "1");
}
@ -151,18 +157,20 @@ size_t CatBoostLibraryBridgeHelper::getTreeCount()
assert(model_path && library_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_GETTREECOUNT_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(*library_path) << "&";
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_GETTREECOUNT_METHOD))
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.withOutCallback(
[this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(*library_path) << "&";
os << "model_path=" << escapeForFileName(*model_path);
})
.create(credentials);
size_t result;
readIntBinary(result, buf);
readIntBinary(result, *buf);
return result;
}
@ -177,17 +185,19 @@ ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & c
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIB_EVALUATE_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this, serialized = string_write_buf.str()](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path) << "&";
os << "data=" << escapeForFileName(serialized);
},
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(createRequestURI(CATBOOST_LIB_EVALUATE_METHOD))
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.withOutCallback(
[this, serialized = string_write_buf.str()](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path) << "&";
os << "data=" << escapeForFileName(serialized);
})
.create(credentials);
NativeReader deserializer(buf, /*server_revision*/ 0);
NativeReader deserializer(*buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
return block_read.getColumns()[0];

View File

@ -71,8 +71,12 @@ bool ExternalDictionaryLibraryBridgeHelper::bridgeHandShake()
String result;
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
readString(result, buf);
auto buf = BuilderRWBufferFromHTTP(getPingURI())
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withTimeouts(http_timeouts)
.create(credentials);
readString(result, *buf);
}
catch (...)
{
@ -247,30 +251,28 @@ QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadKeys(const Block & requ
bool ExternalDictionaryLibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const
{
ReadWriteBufferFromHTTP buf(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
http_timeouts, credentials);
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(http_timeouts)
.withOutCallback(std::move(out_stream_callback))
.create(credentials);
bool res;
readBoolText(res, buf);
readBoolText(res, *buf);
return res;
}
QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback)
{
auto read_buf_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
http_timeouts,
credentials,
0,
DBMS_DEFAULT_BUFFER_SIZE,
getContext()->getReadSettings(),
HTTPHeaderEntries{});
auto read_buf_ptr = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withSettings(getContext()->getReadSettings())
.withTimeouts(http_timeouts)
.withOutCallback(std::move(out_stream_callback))
.create(credentials);
auto source = FormatFactory::instance().getInput(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, *read_buf_ptr, sample_block, getContext(), DEFAULT_BLOCK_SIZE);
source->addBuffer(std::move(read_buf_ptr));

View File

@ -97,8 +97,12 @@ protected:
{
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, getHTTPTimeouts(), credentials);
return checkString(PING_OK_ANSWER, buf);
auto buf = BuilderRWBufferFromHTTP(getPingURI())
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withTimeouts(getHTTPTimeouts())
.create(credentials);
return checkString(PING_OK_ANSWER, *buf);
}
catch (...)
{
@ -198,10 +202,14 @@ protected:
uri.addQueryParameter("connection_string", getConnectionString());
uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling));
ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, getHTTPTimeouts(), credentials);
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.create(credentials);
bool res;
readBoolText(res, buf);
bool res = false;
readBoolText(res, *buf);
is_schema_allowed = res;
}
@ -220,10 +228,14 @@ protected:
uri.addQueryParameter("connection_string", getConnectionString());
uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling));
ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, getHTTPTimeouts(), credentials);
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.create(credentials);
std::string character;
readStringBinary(character, buf);
readStringBinary(character, *buf);
if (character.length() > 1)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Failed to parse quoting style from '{}' for service {}",
character, BridgeHelperMixin::serviceAlias());

View File

@ -153,6 +153,12 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
current_resolved_address = *it;
break;
}
catch (DB::NetException &)
{
if (++it == addresses.end())
throw;
continue;
}
catch (Poco::Net::NetException &)
{
if (++it == addresses.end())
@ -199,6 +205,17 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
server_name, server_version_major, server_version_minor, server_version_patch);
}
catch (DB::NetException & e)
{
disconnect();
/// Remove this possible stale entry from cache
DNSResolver::instance().removeHostFromCache(host);
/// Add server address to exception. Exception will preserve stack trace.
e.addMessage("({})", getDescription());
throw;
}
catch (Poco::Net::NetException & e)
{
disconnect();
@ -206,7 +223,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
/// Remove this possible stale entry from cache
DNSResolver::instance().removeHostFromCache(host);
/// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost.
/// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost.
throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription());
}
catch (Poco::TimeoutException & e)
@ -216,7 +233,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
/// Remove this possible stale entry from cache
DNSResolver::instance().removeHostFromCache(host);
/// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost.
/// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost.
/// This exception can only be thrown from socket->connect(), so add information about connection timeout.
const auto & connection_timeout = static_cast<bool>(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout;
throw NetException(

View File

@ -232,7 +232,7 @@ ASTPtr QueryFuzzer::getRandomColumnLike()
return nullptr;
}
ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()]->clone();
ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()].second->clone();
new_ast->setAlias("");
return new_ast;
@ -272,7 +272,7 @@ void QueryFuzzer::replaceWithTableLike(ASTPtr & ast)
return;
}
ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()]->clone();
ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()].second->clone();
std::string old_alias = ast->tryGetAlias();
new_ast->setAlias(old_alias);
@ -1214,57 +1214,46 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
}
}
#define AST_FUZZER_PART_TYPE_CAP 1000
/*
* This functions collects various parts of query that we can then substitute
* to a query being fuzzed.
*
* TODO: we just stop remembering new parts after our corpus reaches certain size.
* This is boring, should implement a random replacement of existing parst with
* small probability. Do this after we add this fuzzer to CI and fix all the
* problems it can routinely find even in this boring version.
*/
void QueryFuzzer::collectFuzzInfoMain(ASTPtr ast)
{
collectFuzzInfoRecurse(ast);
aliases.clear();
for (const auto & alias : aliases_set)
{
aliases.push_back(alias);
}
column_like.clear();
for (const auto & [name, value] : column_like_map)
{
column_like.push_back(value);
}
table_like.clear();
for (const auto & [name, value] : table_like_map)
{
table_like.push_back(value);
}
}
void QueryFuzzer::addTableLike(ASTPtr ast)
{
if (table_like_map.size() > 1000)
if (table_like_map.size() > AST_FUZZER_PART_TYPE_CAP)
{
table_like_map.clear();
const auto iter = std::next(table_like.begin(), fuzz_rand() % table_like.size());
const auto ast_del = *iter;
table_like.erase(iter);
table_like_map.erase(ast_del.first);
}
const auto name = ast->formatForErrorMessage();
if (name.size() < 200)
{
table_like_map.insert({name, ast});
const auto res = table_like_map.insert({name, ast});
if (res.second)
{
table_like.push_back({name, ast});
}
}
}
void QueryFuzzer::addColumnLike(ASTPtr ast)
{
if (column_like_map.size() > 1000)
if (column_like_map.size() > AST_FUZZER_PART_TYPE_CAP)
{
column_like_map.clear();
const auto iter = std::next(column_like.begin(), fuzz_rand() % column_like.size());
const auto ast_del = *iter;
column_like.erase(iter);
column_like_map.erase(ast_del.first);
}
const auto name = ast->formatForErrorMessage();
@ -1279,22 +1268,16 @@ void QueryFuzzer::addColumnLike(ASTPtr ast)
}
if (name.size() < 200)
{
column_like_map.insert({name, ast});
const auto res = column_like_map.insert({name, ast});
if (res.second)
{
column_like.push_back({name, ast});
}
}
}
void QueryFuzzer::collectFuzzInfoRecurse(ASTPtr ast)
{
if (auto * impl = dynamic_cast<ASTWithAlias *>(ast.get()))
{
if (aliases_set.size() > 1000)
{
aliases_set.clear();
}
aliases_set.insert(impl->alias);
}
if (typeid_cast<ASTLiteral *>(ast.get()))
{
addColumnLike(ast);

View File

@ -50,14 +50,12 @@ struct QueryFuzzer
// we are currently fuzzing. We add some part from each new query we are asked
// to fuzz, and keep this state between queries, so the fuzzing output becomes
// more interesting over time, as the queries mix.
std::unordered_set<std::string> aliases_set;
std::vector<std::string> aliases;
// The hash tables are used for collection, and the vectors are used for random access.
std::unordered_map<std::string, ASTPtr> column_like_map;
std::vector<ASTPtr> column_like;
std::vector<std::pair<std::string, ASTPtr>> column_like;
std::unordered_map<std::string, ASTPtr> table_like_map;
std::vector<ASTPtr> table_like;
std::vector<std::pair<std::string, ASTPtr>> table_like;
// Some debug fields for detecting problematic ASTs with loops.
// These are reset for each fuzzMain call.

View File

@ -274,7 +274,19 @@
M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \
M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \
M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \
M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache")
M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \
\
M(StorageConnectionsStored, "Total count of sessions stored in the session pool for storages") \
M(StorageConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for storages") \
\
M(DiskConnectionsStored, "Total count of sessions stored in the session pool for disks") \
M(DiskConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for disks") \
\
M(HTTPConnectionsStored, "Total count of sessions stored in the session pool for http hosts") \
M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \
\
M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \
#ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)

View File

@ -1,6 +1,7 @@
#include "DNSResolver.h"
#include <Common/CacheBase.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Common/ProfileEvents.h>
#include <Common/thread_local_rng.h>
#include <Common/logger_useful.h>
@ -108,7 +109,7 @@ DNSResolver::IPAddresses hostByName(const std::string & host)
if (addresses.empty())
{
ProfileEvents::increment(ProfileEvents::DNSError);
throw Exception(ErrorCodes::DNS_ERROR, "Not found address of host: {}", host);
throw DB::NetException(ErrorCodes::DNS_ERROR, "Not found address of host: {}", host);
}
return addresses;

View File

@ -0,0 +1,865 @@
#include <Common/HTTPConnectionPool.h>
#include <Common/HostResolvePool.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <Common/logger_useful.h>
#include <Common/Exception.h>
#include <Common/ErrorCodes.h>
#include <Common/ProxyConfiguration.h>
#include <Common/MemoryTrackerSwitcher.h>
#include <Common/SipHash.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPStream.h>
#include <Poco/Net/HTTPFixedLengthStream.h>
#include <Poco/Net/HTTPChunkedStream.h>
#include <Poco/Timespan.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/Net/HTTPRequest.h>
#include "config.h"
#if USE_SSL
#include <Poco/Net/HTTPSClientSession.h>
#endif
namespace ProfileEvents
{
extern const Event StorageConnectionsCreated;
extern const Event StorageConnectionsReused;
extern const Event StorageConnectionsReset;
extern const Event StorageConnectionsPreserved;
extern const Event StorageConnectionsExpired;
extern const Event StorageConnectionsErrors;
extern const Event StorageConnectionsElapsedMicroseconds;
extern const Event DiskConnectionsCreated;
extern const Event DiskConnectionsReused;
extern const Event DiskConnectionsReset;
extern const Event DiskConnectionsPreserved;
extern const Event DiskConnectionsExpired;
extern const Event DiskConnectionsErrors;
extern const Event DiskConnectionsElapsedMicroseconds;
extern const Event HTTPConnectionsCreated;
extern const Event HTTPConnectionsReused;
extern const Event HTTPConnectionsReset;
extern const Event HTTPConnectionsPreserved;
extern const Event HTTPConnectionsExpired;
extern const Event HTTPConnectionsErrors;
extern const Event HTTPConnectionsElapsedMicroseconds;
}
namespace CurrentMetrics
{
extern const Metric StorageConnectionsStored;
extern const Metric StorageConnectionsTotal;
extern const Metric DiskConnectionsStored;
extern const Metric DiskConnectionsTotal;
extern const Metric HTTPConnectionsStored;
extern const Metric HTTPConnectionsTotal;
}
namespace
{
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration)
{
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
poco_proxy_config.host = proxy_configuration.host;
poco_proxy_config.port = proxy_configuration.port;
poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol);
poco_proxy_config.tunnel = proxy_configuration.tunneling;
poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
return poco_proxy_config;
}
size_t roundUp(size_t x, size_t rounding)
{
chassert(rounding > 0);
return (x + (rounding - 1)) / rounding * rounding;
}
Poco::Timespan divide(const Poco::Timespan span, int divisor)
{
return Poco::Timespan(Poco::Timestamp::TimeDiff(span.totalMicroseconds() / divisor));
}
}
namespace DB
{
namespace ErrorCodes
{
extern const int SUPPORT_IS_DISABLED;
extern const int UNSUPPORTED_URI_SCHEME;
}
IHTTPConnectionPoolForEndpoint::Metrics getMetricsForStorageConnectionPool()
{
return IHTTPConnectionPoolForEndpoint::Metrics{
.created = ProfileEvents::StorageConnectionsCreated,
.reused = ProfileEvents::StorageConnectionsReused,
.reset = ProfileEvents::StorageConnectionsReset,
.preserved = ProfileEvents::StorageConnectionsPreserved,
.expired = ProfileEvents::StorageConnectionsExpired,
.errors = ProfileEvents::StorageConnectionsErrors,
.elapsed_microseconds = ProfileEvents::StorageConnectionsElapsedMicroseconds,
.stored_count = CurrentMetrics::StorageConnectionsStored,
.active_count = CurrentMetrics::StorageConnectionsTotal,
};
}
IHTTPConnectionPoolForEndpoint::Metrics getMetricsForDiskConnectionPool()
{
return IHTTPConnectionPoolForEndpoint::Metrics{
.created = ProfileEvents::DiskConnectionsCreated,
.reused = ProfileEvents::DiskConnectionsReused,
.reset = ProfileEvents::DiskConnectionsReset,
.preserved = ProfileEvents::DiskConnectionsPreserved,
.expired = ProfileEvents::DiskConnectionsExpired,
.errors = ProfileEvents::DiskConnectionsErrors,
.elapsed_microseconds = ProfileEvents::DiskConnectionsElapsedMicroseconds,
.stored_count = CurrentMetrics::DiskConnectionsStored,
.active_count = CurrentMetrics::DiskConnectionsTotal,
};
}
IHTTPConnectionPoolForEndpoint::Metrics getMetricsForHTTPConnectionPool()
{
return IHTTPConnectionPoolForEndpoint::Metrics{
.created = ProfileEvents::HTTPConnectionsCreated,
.reused = ProfileEvents::HTTPConnectionsReused,
.reset = ProfileEvents::HTTPConnectionsReset,
.preserved = ProfileEvents::HTTPConnectionsPreserved,
.expired = ProfileEvents::HTTPConnectionsExpired,
.errors = ProfileEvents::HTTPConnectionsErrors,
.elapsed_microseconds = ProfileEvents::HTTPConnectionsElapsedMicroseconds,
.stored_count = CurrentMetrics::HTTPConnectionsStored,
.active_count = CurrentMetrics::HTTPConnectionsTotal,
};
}
IHTTPConnectionPoolForEndpoint::Metrics getConnectionPoolMetrics(HTTPConnectionGroupType type)
{
switch (type)
{
case HTTPConnectionGroupType::STORAGE:
return getMetricsForStorageConnectionPool();
case HTTPConnectionGroupType::DISK:
return getMetricsForDiskConnectionPool();
case HTTPConnectionGroupType::HTTP:
return getMetricsForHTTPConnectionPool();
}
}
class ConnectionGroup
{
public:
using Ptr = std::shared_ptr<ConnectionGroup>;
explicit ConnectionGroup(HTTPConnectionGroupType type_) : type(type_), metrics(getConnectionPoolMetrics(type_)) { }
void setLimits(HTTPConnectionPools::Limits limits_)
{
std::lock_guard lock(mutex);
limits = std::move(limits_);
mute_warning_until = 0;
}
bool isSoftLimitReached() const
{
std::lock_guard lock(mutex);
return total_connections_in_group >= limits.soft_limit;
}
bool isStoreLimitReached() const
{
std::lock_guard lock(mutex);
return total_connections_in_group >= limits.store_limit;
}
void atConnectionCreate()
{
std::lock_guard lock(mutex);
++total_connections_in_group;
if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until)
{
LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit);
mute_warning_until = roundUp(total_connections_in_group, limits.warning_step);
}
}
void atConnectionDestroy()
{
std::lock_guard lock(mutex);
--total_connections_in_group;
const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 10 : 1;
if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit)
{
LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group);
mute_warning_until = 0;
}
}
HTTPConnectionGroupType getType() const { return type; }
const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; }
private:
const HTTPConnectionGroupType type;
const IHTTPConnectionPoolForEndpoint::Metrics metrics;
LoggerPtr log = getLogger("ConnectionGroup");
mutable std::mutex mutex;
HTTPConnectionPools::Limits limits TSA_GUARDED_BY(mutex) = HTTPConnectionPools::Limits();
size_t total_connections_in_group TSA_GUARDED_BY(mutex) = 0;
size_t mute_warning_until TSA_GUARDED_BY(mutex) = 0;
};
class IExtendedPool : public IHTTPConnectionPoolForEndpoint
{
public:
using Ptr = std::shared_ptr<IExtendedPool>;
virtual HTTPConnectionGroupType getGroupType() const = 0;
virtual size_t wipeExpired() = 0;
};
// EndpointConnectionPool manage connections to the endpoint
// Features:
// - it uses HostResolver for address selecting. See Common/HostResolver.h for more info.
// - it minimizes number of `Session::connect()`/`Session::reconnect()` calls
// - stores only connected and ready to use sessions
// - connection could be reused even when limits are reached
// - soft limit, warn limit, store limit
// - `Session::reconnect()` uses the pool as well
// - comprehensive sensors
// - session is reused according its inner state, automatically
template <class Session>
class EndpointConnectionPool : public std::enable_shared_from_this<EndpointConnectionPool<Session>>, public IExtendedPool
{
private:
friend class HTTPConnectionPools;
using WeakPtr = std::weak_ptr<EndpointConnectionPool<Session>>;
class PooledConnection : public Session
{
public:
using Ptr = std::shared_ptr<PooledConnection>;
void reconnect() override
{
ProfileEvents::increment(metrics.reset);
Session::close();
if (auto lock = pool.lock())
{
auto timeouts = getTimeouts(*this);
auto new_connection = lock->getConnection(timeouts);
Session::assign(*new_connection);
}
else
{
auto timer = CurrentThread::getProfileEvents().timer(metrics.elapsed_microseconds);
Session::reconnect();
ProfileEvents::increment(metrics.created);
}
}
String getTarget() const
{
if (!Session::getProxyConfig().host.empty())
return fmt::format("{} over proxy {}", Session::getHost(), Session::getProxyConfig().host);
return Session::getHost();
}
void flushRequest() override
{
if (bool(request_stream))
{
request_stream->flush();
if (auto * fixed_steam = dynamic_cast<Poco::Net::HTTPFixedLengthOutputStream *>(request_stream))
{
request_stream_completed = fixed_steam->isComplete();
}
else if (auto * chunked_steam = dynamic_cast<Poco::Net::HTTPChunkedOutputStream *>(request_stream))
{
chunked_steam->rdbuf()->close();
request_stream_completed = chunked_steam->isComplete();
}
else if (auto * http_stream = dynamic_cast<Poco::Net::HTTPOutputStream *>(request_stream))
{
request_stream_completed = http_stream->isComplete();
}
else
{
request_stream_completed = false;
}
}
request_stream = nullptr;
Session::flushRequest();
}
std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override
{
std::ostream & result = Session::sendRequest(request);
result.exceptions(std::ios::badbit);
request_stream = &result;
request_stream_completed = false;
response_stream = nullptr;
response_stream_completed = false;
return result;
}
std::istream & receiveResponse(Poco::Net::HTTPResponse & response) override
{
std::istream & result = Session::receiveResponse(response);
result.exceptions(std::ios::badbit);
response_stream = &result;
response_stream_completed = false;
return result;
}
void reset() override
{
request_stream = nullptr;
request_stream_completed = false;
response_stream = nullptr;
response_stream_completed = false;
Session::reset();
}
~PooledConnection() override
{
if (bool(response_stream))
{
if (auto * fixed_steam = dynamic_cast<Poco::Net::HTTPFixedLengthInputStream *>(response_stream))
{
response_stream_completed = fixed_steam->isComplete();
}
else if (auto * chunked_steam = dynamic_cast<Poco::Net::HTTPChunkedInputStream *>(response_stream))
{
response_stream_completed = chunked_steam->isComplete();
}
else if (auto * http_stream = dynamic_cast<Poco::Net::HTTPInputStream *>(response_stream))
{
response_stream_completed = http_stream->isComplete();
}
else
{
response_stream_completed = false;
}
}
response_stream = nullptr;
if (auto lock = pool.lock())
lock->atConnectionDestroy(*this);
else
ProfileEvents::increment(metrics.reset);
CurrentMetrics::sub(metrics.active_count);
}
private:
friend class EndpointConnectionPool;
template <class... Args>
explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args)
: Session(args...), pool(std::move(pool_)), metrics(std::move(metrics_))
{
CurrentMetrics::add(metrics.active_count);
}
template <class... Args>
static Ptr create(Args &&... args)
{
/// Pool is global, we shouldn't attribute this memory to query/user.
MemoryTrackerSwitcher switcher{&total_memory_tracker};
struct make_shared_enabler : public PooledConnection
{
explicit make_shared_enabler(Args &&... args) : PooledConnection(std::forward<Args>(args)...) { }
};
return std::make_shared<make_shared_enabler>(std::forward<Args>(args)...);
}
void doConnect()
{
Session::reconnect();
}
bool isCompleted() const
{
return request_stream_completed && response_stream_completed;
}
WeakPtr pool;
IHTTPConnectionPoolForEndpoint::Metrics metrics;
Poco::Logger * log = &Poco::Logger::get("PooledConnection");
std::ostream * request_stream = nullptr;
std::istream * response_stream = nullptr;
bool request_stream_completed = true;
bool response_stream_completed = true;
};
using Connection = PooledConnection;
using ConnectionPtr = PooledConnection::Ptr;
struct GreaterByLastRequest
{
static bool operator()(const ConnectionPtr & l, const ConnectionPtr & r)
{
return l->getLastRequest() + l->getKeepAliveTimeout() > r->getLastRequest() + r->getKeepAliveTimeout();
}
};
using ConnectionsMinHeap = std::priority_queue<ConnectionPtr, std::vector<ConnectionPtr>, GreaterByLastRequest>;
public:
template <class... Args>
static Ptr create(Args &&... args)
{
struct make_shared_enabler : public EndpointConnectionPool<Session>
{
explicit make_shared_enabler(Args &&... args) : EndpointConnectionPool<Session>(std::forward<Args>(args)...) { }
};
return std::make_shared<make_shared_enabler>(std::forward<Args>(args)...);
}
~EndpointConnectionPool() override
{
CurrentMetrics::sub(group->getMetrics().stored_count, stored_connections.size());
}
String getTarget() const
{
if (!proxy_configuration.isEmpty())
return fmt::format("{} over proxy {}", host, proxy_configuration.host);
return host;
}
IHTTPConnectionPoolForEndpoint::ConnectionPtr getConnection(const ConnectionTimeouts & timeouts) override
{
Poco::Timestamp now;
std::vector<ConnectionPtr> expired_connections;
SCOPE_EXIT({
MemoryTrackerSwitcher switcher{&total_memory_tracker};
expired_connections.clear();
});
{
std::lock_guard lock(mutex);
wipeExpiredImpl(expired_connections, now);
if (!stored_connections.empty())
{
auto it = stored_connections.top();
stored_connections.pop();
setTimeouts(*it, timeouts);
ProfileEvents::increment(getMetrics().reused, 1);
CurrentMetrics::sub(getMetrics().stored_count, 1);
return it;
}
}
return prepareNewConnection(timeouts);
}
const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const override
{
return group->getMetrics();
}
HTTPConnectionGroupType getGroupType() const override
{
return group->getType();
}
size_t wipeExpired() override
{
Poco::Timestamp now;
std::vector<ConnectionPtr> expired_connections;
SCOPE_EXIT({
MemoryTrackerSwitcher switcher{&total_memory_tracker};
expired_connections.clear();
});
std::lock_guard lock(mutex);
return wipeExpiredImpl(expired_connections, now);
}
size_t wipeExpiredImpl(std::vector<ConnectionPtr> & expired_connections, Poco::Timestamp now) TSA_REQUIRES(mutex)
{
while (!stored_connections.empty())
{
auto connection = stored_connections.top();
if (!isExpired(now, connection))
return stored_connections.size();
stored_connections.pop();
expired_connections.push_back(connection);
}
CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size());
ProfileEvents::increment(getMetrics().expired, expired_connections.size());
return stored_connections.size();
}
private:
EndpointConnectionPool(ConnectionGroup::Ptr group_, String host_, UInt16 port_, bool https_, ProxyConfiguration proxy_configuration_)
: host(std::move(host_))
, port(port_)
, https(https_)
, proxy_configuration(std::move(proxy_configuration_))
, group(group_)
{
}
WeakPtr getWeakFromThis() { return EndpointConnectionPool::weak_from_this(); }
bool isExpired(Poco::Timestamp & now, ConnectionPtr connection)
{
if (group->isSoftLimitReached())
return now > (connection->getLastRequest() + divide(connection->getKeepAliveTimeout(), 10));
return now > connection->getLastRequest() + connection->getKeepAliveTimeout();
}
ConnectionPtr allocateNewConnection()
{
ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), getMetrics(), host, port);
connection->setKeepAlive(true);
if (!proxy_configuration.isEmpty())
{
connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration));
}
group->atConnectionCreate();
return connection;
}
ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts)
{
auto address = HostResolversPool::instance().getResolver(host)->resolve();
auto session = allocateNewConnection();
setTimeouts(*session, timeouts);
session->setResolvedHost(*address);
try
{
auto timer = CurrentThread::getProfileEvents().timer(getMetrics().elapsed_microseconds);
session->doConnect();
}
catch (...)
{
address.setFail();
ProfileEvents::increment(getMetrics().errors);
session->reset();
throw;
}
ProfileEvents::increment(getMetrics().created);
return session;
}
void atConnectionDestroy(PooledConnection & connection)
{
group->atConnectionDestroy();
if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered()
|| group->isStoreLimitReached())
{
ProfileEvents::increment(getMetrics().reset, 1);
return;
}
auto connection_to_store = allocateNewConnection();
connection_to_store->assign(connection);
CurrentMetrics::add(getMetrics().stored_count, 1);
ProfileEvents::increment(getMetrics().preserved, 1);
{
MemoryTrackerSwitcher switcher{&total_memory_tracker};
std::lock_guard lock(mutex);
stored_connections.push(connection_to_store);
}
}
const std::string host;
const UInt16 port;
const bool https;
const ProxyConfiguration proxy_configuration;
const ConnectionGroup::Ptr group;
std::mutex mutex;
ConnectionsMinHeap stored_connections TSA_GUARDED_BY(mutex);
};
struct EndpointPoolKey
{
HTTPConnectionGroupType connection_group;
String target_host;
UInt16 target_port;
bool is_target_https;
ProxyConfiguration proxy_config;
bool operator==(const EndpointPoolKey & rhs) const
{
return std::tie(
connection_group,
target_host,
target_port,
is_target_https,
proxy_config.host,
proxy_config.port,
proxy_config.protocol,
proxy_config.tunneling,
proxy_config.original_request_protocol)
== std::tie(
rhs.connection_group,
rhs.target_host,
rhs.target_port,
rhs.is_target_https,
rhs.proxy_config.host,
rhs.proxy_config.port,
rhs.proxy_config.protocol,
rhs.proxy_config.tunneling,
rhs.proxy_config.original_request_protocol);
}
};
struct Hasher
{
size_t operator()(const EndpointPoolKey & k) const
{
SipHash s;
s.update(k.connection_group);
s.update(k.target_host);
s.update(k.target_port);
s.update(k.is_target_https);
s.update(k.proxy_config.host);
s.update(k.proxy_config.port);
s.update(k.proxy_config.protocol);
s.update(k.proxy_config.tunneling);
s.update(k.proxy_config.original_request_protocol);
return s.get64();
}
};
IExtendedPool::Ptr
createConnectionPool(ConnectionGroup::Ptr group, std::string host, UInt16 port, bool secure, ProxyConfiguration proxy_configuration)
{
if (secure)
{
#if USE_SSL
return EndpointConnectionPool<Poco::Net::HTTPSClientSession>::create(
group, std::move(host), port, secure, std::move(proxy_configuration));
#else
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED, "Inter-server secret support is disabled, because ClickHouse was built without SSL library");
#endif
}
else
{
return EndpointConnectionPool<Poco::Net::HTTPClientSession>::create(
group, std::move(host), port, secure, std::move(proxy_configuration));
}
}
class HTTPConnectionPools::Impl
{
private:
const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 5 * 60;
const Poco::Timespan wipe_timeout = Poco::Timespan(DEFAULT_WIPE_TIMEOUT_SECONDS, 0);
ConnectionGroup::Ptr disk_group = std::make_shared<ConnectionGroup>(HTTPConnectionGroupType::DISK);
ConnectionGroup::Ptr storage_group = std::make_shared<ConnectionGroup>(HTTPConnectionGroupType::STORAGE);
ConnectionGroup::Ptr http_group = std::make_shared<ConnectionGroup>(HTTPConnectionGroupType::HTTP);
/// If multiple mutexes are held simultaneously,
/// they should be locked in this order:
/// HTTPConnectionPools::mutex, then EndpointConnectionPool::mutex, then ConnectionGroup::mutex.
std::mutex mutex;
std::unordered_map<EndpointPoolKey, IExtendedPool::Ptr, Hasher> endpoints_pool TSA_GUARDED_BY(mutex);
Poco::Timestamp last_wipe_time TSA_GUARDED_BY(mutex);
public:
IHTTPConnectionPoolForEndpoint::Ptr getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration)
{
Poco::Timestamp now;
std::lock_guard lock(mutex);
if (now - last_wipe_time > wipe_timeout)
{
wipeExpired();
last_wipe_time = now;
}
return getPoolImpl(type, uri, proxy_configuration);
}
void setLimits(HTTPConnectionPools::Limits disk, HTTPConnectionPools::Limits storage, HTTPConnectionPools::Limits http)
{
disk_group->setLimits(std::move(disk));
storage_group->setLimits(std::move(storage));
http_group->setLimits(std::move(http));
}
void dropCache()
{
std::lock_guard lock(mutex);
endpoints_pool.clear();
}
protected:
ConnectionGroup::Ptr & getGroup(HTTPConnectionGroupType type)
{
switch (type)
{
case HTTPConnectionGroupType::DISK:
return disk_group;
case HTTPConnectionGroupType::STORAGE:
return storage_group;
case HTTPConnectionGroupType::HTTP:
return http_group;
}
}
IExtendedPool::Ptr getPoolImpl(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration)
TSA_REQUIRES(mutex)
{
auto [host, port, secure] = getHostPortSecure(uri, proxy_configuration);
auto key = EndpointPoolKey{type, host, port, secure, proxy_configuration};
auto it = endpoints_pool.find(key);
if (it != endpoints_pool.end())
return it->second;
it = endpoints_pool.emplace(key, createConnectionPool(getGroup(type), std::move(host), port, secure, proxy_configuration)).first;
return it->second;
}
void wipeExpired() TSA_REQUIRES(mutex)
{
std::vector<EndpointPoolKey> keys_to_drop;
for (auto & [key, pool] : endpoints_pool)
{
auto left_connections = pool->wipeExpired();
if (left_connections == 0 && pool->getGroupType() != HTTPConnectionGroupType::DISK)
keys_to_drop.push_back(key);
}
for (const auto & key : keys_to_drop)
endpoints_pool.erase(key);
}
static bool useSecureConnection(const Poco::URI & uri, const ProxyConfiguration & proxy_configuration)
{
if (uri.getScheme() == "http")
return false;
if (uri.getScheme() != "https")
throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString());
if (!proxy_configuration.isEmpty())
{
if (ProxyConfiguration::Protocol::HTTP == proxy_configuration.protocol && !proxy_configuration.tunneling)
{
// If it is an HTTPS request, proxy server is HTTP and user opted for tunneling off, we must not create an HTTPS request.
// The desired flow is: HTTP request to the proxy server, then proxy server will initiate an HTTPS request to the target server.
// There is a weak link in the security, but that's what the user opted for.
return false;
}
}
return true;
}
static std::tuple<std::string, UInt16, bool> getHostPortSecure(const Poco::URI & uri, const ProxyConfiguration & proxy_configuration)
{
return std::make_tuple(uri.getHost(), uri.getPort(), useSecureConnection(uri, proxy_configuration));
}
};
HTTPConnectionPools::HTTPConnectionPools()
: impl(std::make_unique<HTTPConnectionPools::Impl>())
{
}
HTTPConnectionPools & HTTPConnectionPools::instance()
{
static HTTPConnectionPools instance;
return instance;
}
void HTTPConnectionPools::setLimits(HTTPConnectionPools::Limits disk, HTTPConnectionPools::Limits storage, HTTPConnectionPools::Limits http)
{
impl->setLimits(std::move(disk), std::move(storage), std::move(http));
}
void HTTPConnectionPools::dropCache()
{
impl->dropCache();
}
IHTTPConnectionPoolForEndpoint::Ptr
HTTPConnectionPools::getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration)
{
return impl->getPool(type, uri, proxy_configuration);
}
}

View File

@ -0,0 +1,91 @@
#pragma once
#include <IO/ConnectionTimeouts.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <Common/ProxyConfiguration.h>
#include <Common/logger_useful.h>
#include <base/defines.h>
#include <Poco/Timespan.h>
#include <Poco/Net/HTTPClientSession.h>
#include <mutex>
#include <memory>
namespace DB
{
class IHTTPConnectionPoolForEndpoint
{
public:
struct Metrics
{
const ProfileEvents::Event created = ProfileEvents::end();
const ProfileEvents::Event reused = ProfileEvents::end();
const ProfileEvents::Event reset = ProfileEvents::end();
const ProfileEvents::Event preserved = ProfileEvents::end();
const ProfileEvents::Event expired = ProfileEvents::end();
const ProfileEvents::Event errors = ProfileEvents::end();
const ProfileEvents::Event elapsed_microseconds = ProfileEvents::end();
const CurrentMetrics::Metric stored_count = CurrentMetrics::end();
const CurrentMetrics::Metric active_count = CurrentMetrics::end();
};
using Ptr = std::shared_ptr<IHTTPConnectionPoolForEndpoint>;
using Connection = Poco::Net::HTTPClientSession;
using ConnectionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
/// can throw Poco::Net::Exception, DB::NetException, DB::Exception
virtual ConnectionPtr getConnection(const ConnectionTimeouts & timeouts) = 0;
virtual const Metrics & getMetrics() const = 0;
virtual ~IHTTPConnectionPoolForEndpoint() = default;
protected:
IHTTPConnectionPoolForEndpoint() = default;
IHTTPConnectionPoolForEndpoint(const IHTTPConnectionPoolForEndpoint &) = delete;
IHTTPConnectionPoolForEndpoint & operator=(const IHTTPConnectionPoolForEndpoint &) = delete;
};
enum class HTTPConnectionGroupType
{
DISK,
STORAGE,
HTTP,
};
class HTTPConnectionPools
{
public:
struct Limits
{
size_t soft_limit = 100;
size_t warning_limit = 1000;
size_t store_limit = 10000;
static constexpr size_t warning_step = 100;
};
private:
HTTPConnectionPools();
HTTPConnectionPools(const HTTPConnectionPools &) = delete;
HTTPConnectionPools & operator=(const HTTPConnectionPools &) = delete;
public:
static HTTPConnectionPools & instance();
void setLimits(Limits disk, Limits storage, Limits http);
void dropCache();
IHTTPConnectionPoolForEndpoint::Ptr getPool(HTTPConnectionGroupType type, const Poco::URI & uri, const ProxyConfiguration & proxy_configuration);
private:
class Impl;
std::unique_ptr<Impl> impl;
};
}

View File

@ -0,0 +1,298 @@
#include <Common/HostResolvePool.h>
#include <Common/DNSResolver.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Common/ErrorCodes.h>
#include <Common/thread_local_rng.h>
#include <Common/MemoryTrackerSwitcher.h>
#include <mutex>
namespace ProfileEvents
{
extern const Event AddressesDiscovered;
extern const Event AddressesExpired;
extern const Event AddressesMarkedAsFailed;
}
namespace CurrentMetrics
{
extern const Metric AddressesActive;
}
namespace DB
{
namespace ErrorCodes
{
extern const int DNS_ERROR;
}
HostResolverMetrics HostResolver::getMetrics()
{
return HostResolverMetrics{
.discovered = ProfileEvents::AddressesDiscovered,
.expired = ProfileEvents::AddressesExpired,
.failed = ProfileEvents::AddressesMarkedAsFailed,
.active_count = CurrentMetrics::AddressesActive,
};
}
HostResolver::WeakPtr HostResolver::getWeakFromThis()
{
return weak_from_this();
}
HostResolver::HostResolver(String host_, Poco::Timespan history_)
: host(std::move(host_))
, history(history_)
, resolve_function([](const String & host_to_resolve) { return DNSResolver::instance().resolveHostAll(host_to_resolve); })
{
update();
}
HostResolver::HostResolver(
ResolveFunction && resolve_function_, String host_, Poco::Timespan history_)
: host(std::move(host_)), history(history_), resolve_function(std::move(resolve_function_))
{
update();
}
HostResolver::~HostResolver()
{
std::lock_guard lock(mutex);
CurrentMetrics::sub(metrics.active_count, records.size());
records.clear();
}
void HostResolver::Entry::setFail()
{
fail = true;
if (auto lock = pool.lock())
lock->setFail(address);
}
HostResolver::Entry::~Entry()
{
if (!fail)
{
if (auto lock = pool.lock())
lock->setSuccess(address);
}
}
void HostResolver::update()
{
MemoryTrackerSwitcher switcher{&total_memory_tracker};
auto next_gen = resolve_function(host);
if (next_gen.empty())
throw NetException(ErrorCodes::DNS_ERROR, "no endpoints resolved for host {}", host);
std::sort(next_gen.begin(), next_gen.end());
Poco::Timestamp now;
std::lock_guard lock(mutex);
updateImpl(now, next_gen);
}
void HostResolver::reset()
{
std::lock_guard lock(mutex);
CurrentMetrics::sub(metrics.active_count, records.size());
records.clear();
}
void HostResolver::updateWeights()
{
updateWeightsImpl();
if (getTotalWeight() == 0 && !records.empty())
{
for (auto & rec : records)
rec.failed = false;
updateWeightsImpl();
}
chassert((getTotalWeight() > 0 && !records.empty()) || records.empty());
}
HostResolver::Entry HostResolver::resolve()
{
if (isUpdateNeeded())
update();
std::lock_guard lock(mutex);
return Entry(*this, selectBest());
}
void HostResolver::setSuccess(const Poco::Net::IPAddress & address)
{
std::lock_guard lock(mutex);
auto it = find(address);
if (it == records.end())
return;
auto old_weight = it->getWeight();
++it->usage;
auto new_weight = it->getWeight();
if (old_weight != new_weight)
updateWeights();
}
void HostResolver::setFail(const Poco::Net::IPAddress & address)
{
Poco::Timestamp now;
{
std::lock_guard lock(mutex);
auto it = find(address);
if (it == records.end())
return;
it->failed = true;
it->fail_time = now;
}
ProfileEvents::increment(metrics.failed);
update();
}
Poco::Net::IPAddress HostResolver::selectBest()
{
chassert(!records.empty());
auto random_weight_picker = std::uniform_int_distribution<size_t>(0, getTotalWeight() - 1);
size_t weight = random_weight_picker(thread_local_rng);
auto it = std::partition_point(records.begin(), records.end(), [&](const Record & rec) { return rec.weight_prefix_sum <= weight; });
chassert(it != records.end());
return it->address;
}
HostResolver::Records::iterator HostResolver::find(const Poco::Net::IPAddress & addr) TSA_REQUIRES(mutex)
{
auto it = std::lower_bound(
records.begin(), records.end(), addr, [](const Record & rec, const Poco::Net::IPAddress & value) { return rec.address < value; });
if (it != records.end() && it->address != addr)
return records.end();
return it;
}
bool HostResolver::isUpdateNeeded()
{
Poco::Timestamp now;
std::lock_guard lock(mutex);
return last_resolve_time + history < now || records.empty();
}
void HostResolver::updateImpl(Poco::Timestamp now, std::vector<Poco::Net::IPAddress> & next_gen)
TSA_REQUIRES(mutex)
{
const auto last_effective_resolve = now - history;
Records merged;
merged.reserve(records.size() + next_gen.size());
auto it_before = records.begin();
auto it_next = next_gen.begin();
while (it_before != records.end() || it_next != next_gen.end())
{
if (it_next == next_gen.end() || (it_before != records.end() && it_before->address < *it_next))
{
if (it_before->resolve_time >= last_effective_resolve)
merged.push_back(*it_before);
else
{
CurrentMetrics::sub(metrics.active_count, 1);
ProfileEvents::increment(metrics.expired, 1);
}
++it_before;
}
else if (it_before == records.end() || (it_next != next_gen.end() && *it_next < it_before->address))
{
CurrentMetrics::add(metrics.active_count, 1);
ProfileEvents::increment(metrics.discovered, 1);
merged.push_back(Record(*it_next, now));
++it_next;
}
else
{
merged.push_back(*it_before);
merged.back().resolve_time = now;
++it_before;
++it_next;
}
}
for (auto & rec : merged)
if (rec.failed && rec.fail_time < last_effective_resolve)
rec.failed = false;
chassert(std::is_sorted(merged.begin(), merged.end()));
last_resolve_time = now;
records.swap(merged);
if (records.empty())
throw NetException(ErrorCodes::DNS_ERROR, "no endpoints resolved for host {}", host);
updateWeights();
}
size_t HostResolver::getTotalWeight() const
{
if (records.empty())
return 0;
return records.back().weight_prefix_sum;
}
void HostResolver::updateWeightsImpl()
{
size_t total_weight_next = 0;
for (auto & rec: records)
{
total_weight_next += rec.getWeight();
rec.weight_prefix_sum = total_weight_next;
}
}
HostResolversPool & HostResolversPool::instance()
{
static HostResolversPool instance;
return instance;
}
void HostResolversPool::dropCache()
{
std::lock_guard lock(mutex);
host_pools.clear();
}
HostResolver::Ptr HostResolversPool::getResolver(const String & host)
{
std::lock_guard lock(mutex);
auto it = host_pools.find(host);
if (it != host_pools.end())
return it->second;
it = host_pools.emplace(host, HostResolver::create(host)).first;
return it->second;
}
}

View File

@ -0,0 +1,216 @@
#pragma once
#include <Common/CurrentMetrics.h>
#include <Common/HTTPConnectionPool.h>
#include <Common/ProfileEvents.h>
#include <Common/logger_useful.h>
#include <base/defines.h>
#include <Poco/Net/IPAddress.h>
#include <mutex>
#include <memory>
// That class resolves host into multiply addresses
// Features:
// - balance address usage.
// `selectBest()` chooses the address by random with weights.
// The more ip is used the lesser weight it has. When new address is happened, it takes more weight.
// But still not all requests are assigned to the new address.
// - join resolve results
// In case when host is resolved into different set of addresses, this class join all that addresses and use them.
// An address expires after `history_` time.
// - failed address pessimization
// If an address marked with `setFail()` it is marked as faulty. Such address won't be selected until either
// a) it still occurs in resolve set after `history_` time or b) all other addresses are pessimized as well.
// - resolve schedule
// Addresses are resolved through `DB::DNSResolver::instance()`.
// Usually it does not happen more often than once in `history_` time.
// But also new resolve performed each `setFail()` call.
namespace DB
{
struct HostResolverMetrics
{
const ProfileEvents::Event discovered = ProfileEvents::end();
const ProfileEvents::Event expired = ProfileEvents::end();
const ProfileEvents::Event failed = ProfileEvents::end();
const CurrentMetrics::Metric active_count = CurrentMetrics::end();
};
constexpr size_t DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60;
class HostResolver : public std::enable_shared_from_this<HostResolver>
{
private:
using WeakPtr = std::weak_ptr<HostResolver>;
public:
using Ptr = std::shared_ptr<HostResolver>;
template<class... Args>
static Ptr create(Args&&... args)
{
struct make_shared_enabler : public HostResolver
{
explicit make_shared_enabler(Args&&... args) : HostResolver(std::forward<Args>(args)...) {}
};
return std::make_shared<make_shared_enabler>(std::forward<Args>(args)...);
}
virtual ~HostResolver();
class Entry
{
public:
explicit Entry(Entry && entry) = default;
explicit Entry(Entry & entry) = delete;
// no access as r-value
const String * operator->() && = delete;
const String * operator->() const && = delete;
const String & operator*() && = delete;
const String & operator*() const && = delete;
const String * operator->() & { return &resolved_host; }
const String * operator->() const & { return &resolved_host; }
const String & operator*() & { return resolved_host; }
const String & operator*() const & { return resolved_host; }
void setFail();
~Entry();
private:
friend class HostResolver;
Entry(HostResolver & pool_, Poco::Net::IPAddress address_)
: pool(pool_.getWeakFromThis())
, address(std::move(address_))
, resolved_host(address.toString())
{ }
HostResolver::WeakPtr pool;
const Poco::Net::IPAddress address;
const String resolved_host;
bool fail = false;
};
/// can throw NetException(ErrorCodes::DNS_ERROR, ...), Exception(ErrorCodes::BAD_ARGUMENTS, ...)
Entry resolve();
void update();
void reset();
static HostResolverMetrics getMetrics();
protected:
explicit HostResolver(
String host_,
Poco::Timespan history_ = Poco::Timespan(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS, 0));
using ResolveFunction = std::function<std::vector<Poco::Net::IPAddress> (const String & host)>;
HostResolver(ResolveFunction && resolve_function_,
String host_,
Poco::Timespan history_);
friend class Entry;
WeakPtr getWeakFromThis();
void setSuccess(const Poco::Net::IPAddress & address);
void setFail(const Poco::Net::IPAddress & address);
struct Record
{
Record(Poco::Net::IPAddress address_, Poco::Timestamp resolve_time_)
: address(std::move(address_))
, resolve_time(resolve_time_)
{}
explicit Record(Record && rec) = default;
Record& operator=(Record && s) = default;
explicit Record(const Record & rec) = default;
Record& operator=(const Record & s) = default;
Poco::Net::IPAddress address;
Poco::Timestamp resolve_time;
size_t usage = 0;
bool failed = false;
Poco::Timestamp fail_time = 0;
size_t weight_prefix_sum;
bool operator <(const Record & r) const
{
return address < r.address;
}
size_t getWeight() const
{
if (failed)
return 0;
/// There is no goal to make usage's distribution ideally even
/// The goal is to chose more often new address, but still use old addresses as well
/// when all addresses have usage counter greater than 10000,
/// no more corrections are needed, just random choice is ok
if (usage > 10000)
return 1;
if (usage > 1000)
return 5;
if (usage > 100)
return 8;
return 10;
}
};
using Records = std::vector<Record>;
Poco::Net::IPAddress selectBest() TSA_REQUIRES(mutex);
Records::iterator find(const Poco::Net::IPAddress & address) TSA_REQUIRES(mutex);
bool isUpdateNeeded();
void updateImpl(Poco::Timestamp now, std::vector<Poco::Net::IPAddress> & next_gen) TSA_REQUIRES(mutex);
void updateWeights() TSA_REQUIRES(mutex);
void updateWeightsImpl() TSA_REQUIRES(mutex);
size_t getTotalWeight() const TSA_REQUIRES(mutex);
const String host;
const Poco::Timespan history;
const HostResolverMetrics metrics = getMetrics();
// for tests purpose
const ResolveFunction resolve_function;
std::mutex mutex;
Poco::Timestamp last_resolve_time TSA_GUARDED_BY(mutex);
Records records TSA_GUARDED_BY(mutex);
Poco::Logger * log = &Poco::Logger::get("ConnectionPool");
};
class HostResolversPool
{
private:
HostResolversPool() = default;
HostResolversPool(const HostResolversPool &) = delete;
HostResolversPool & operator=(const HostResolversPool &) = delete;
public:
static HostResolversPool & instance();
void dropCache();
HostResolver::Ptr getResolver(const String & host);
private:
std::mutex mutex;
std::unordered_map<String, HostResolver::Ptr> host_pools TSA_GUARDED_BY(mutex);
};
}

View File

@ -696,6 +696,35 @@ The server successfully detected this situation and will download merged part fr
\
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas") \
\
M(StorageConnectionsCreated, "Number of created connections for storages") \
M(StorageConnectionsReused, "Number of reused connections for storages") \
M(StorageConnectionsReset, "Number of reset connections for storages") \
M(StorageConnectionsPreserved, "Number of preserved connections for storages") \
M(StorageConnectionsExpired, "Number of expired connections for storages") \
M(StorageConnectionsErrors, "Number of cases when creation of a connection for storage is failed") \
M(StorageConnectionsElapsedMicroseconds, "Total time spend on creating connections for storages") \
\
M(DiskConnectionsCreated, "Number of created connections for disk") \
M(DiskConnectionsReused, "Number of reused connections for disk") \
M(DiskConnectionsReset, "Number of reset connections for disk") \
M(DiskConnectionsPreserved, "Number of preserved connections for disk") \
M(DiskConnectionsExpired, "Number of expired connections for disk") \
M(DiskConnectionsErrors, "Number of cases when creation of a connection for disk is failed") \
M(DiskConnectionsElapsedMicroseconds, "Total time spend on creating connections for disk") \
\
M(HTTPConnectionsCreated, "Number of created http connections") \
M(HTTPConnectionsReused, "Number of reused http connections") \
M(HTTPConnectionsReset, "Number of reset http connections") \
M(HTTPConnectionsPreserved, "Number of preserved http connections") \
M(HTTPConnectionsExpired, "Number of expired http connections") \
M(HTTPConnectionsErrors, "Number of cases when creation of a http connection failed") \
M(HTTPConnectionsElapsedMicroseconds, "Total time spend on creating http connections") \
\
M(AddressesDiscovered, "Total count of new addresses in dns resolve results for http connections") \
M(AddressesExpired, "Total count of expired addresses which is no longer presented in dns resolve results for http connections") \
M(AddressesMarkedAsFailed, "Total count of addresses which has been marked as faulty due to connection errors for http connections") \
#ifdef APPLY_FOR_EXTERNAL_EVENTS
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

View File

@ -44,11 +44,13 @@ struct ProxyConfiguration
}
}
std::string host;
Protocol protocol;
uint16_t port;
bool tunneling;
Protocol original_request_protocol;
std::string host = std::string{};
Protocol protocol = Protocol::HTTP;
uint16_t port = 0;
bool tunneling = false;
Protocol original_request_protocol = Protocol::HTTP;
bool isEmpty() const { return host.size() == 0; }
};
}

View File

@ -26,8 +26,6 @@ ProxyConfiguration ProxyListConfigurationResolver::resolve()
auto & proxy = proxies[index];
LOG_DEBUG(getLogger("ProxyListConfigurationResolver"), "Use proxy: {}", proxies[index].toString());
return ProxyConfiguration {
proxy.getHost(),
ProxyConfiguration::protocolFromString(proxy.getScheme()),

View File

@ -69,7 +69,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
{
auto resolved_endpoint = endpoint;
resolved_endpoint.setHost(resolved_hosts[i].toString());
session = makeHTTPSession(resolved_endpoint, timeouts);
session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts);
try
{

View File

@ -0,0 +1,585 @@
#include <IO/ReadWriteBufferFromHTTP.h>
#include <Common/HTTPConnectionPool.h>
#include <Poco/URI.h>
#include <Poco/Net/ServerSocket.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/Net/HTTPServer.h>
#include <Poco/Net/HTTPServerParams.h>
#include <Poco/Net/HTTPRequestHandler.h>
#include <Poco/Net/HTTPRequestHandlerFactory.h>
#include <thread>
#include <gtest/gtest.h>
namespace
{
size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits<size_t>::max())
{
const size_t buffer_size = 4096;
char buffer[buffer_size];
size_t total_read = 0;
while (count > buffer_size)
{
in.read(buffer, buffer_size);
size_t read = in.gcount();
out.write(buffer, read);
count -= read;
total_read += read;
if (read == 0)
return total_read;
}
in.read(buffer, count);
size_t read = in.gcount();
out.write(buffer, read);
total_read += read;
return total_read;
}
class MockRequestHandler : public Poco::Net::HTTPRequestHandler
{
public:
explicit MockRequestHandler(std::shared_ptr<std::atomic<size_t>> slowdown_)
: slowdown(std::move(slowdown_))
{
}
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
{
response.setStatus(Poco::Net::HTTPResponse::HTTP_OK);
auto size = request.getContentLength();
if (size > 0)
response.setContentLength(size); // ContentLength is required for keep alive
else
response.setChunkedTransferEncoding(true); // or chunk encoding
sleepForSeconds(*slowdown);
stream_copy_n(request.stream(), response.send(), size);
}
std::shared_ptr<std::atomic<size_t>> slowdown;
};
class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
{
public:
explicit HTTPRequestHandlerFactory(std::shared_ptr<std::atomic<size_t>> slowdown_)
: slowdown(std::move(slowdown_))
{
}
Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override
{
return new MockRequestHandler(slowdown);
}
std::shared_ptr<std::atomic<size_t>> slowdown;
};
}
using HTTPSession = Poco::Net::HTTPClientSession;
using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
class ConnectionPoolTest : public testing::Test {
protected:
ConnectionPoolTest()
{
startServer();
}
void SetUp() override {
timeouts = DB::ConnectionTimeouts();
DB::HTTPConnectionPools::Limits def_limits{};
DB::HTTPConnectionPools::instance().setLimits(def_limits, def_limits, def_limits);
setSlowDown(0);
DB::HTTPConnectionPools::instance().dropCache();
DB::CurrentThread::getProfileEvents().reset();
// Code here will be called immediately after the constructor (right
// before each test).
}
void TearDown() override {
// Code here will be called immediately after each test (right
// before the destructor).
}
DB::IHTTPConnectionPoolForEndpoint::Ptr getPool()
{
auto uri = Poco::URI(getServerUrl());
return DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{});
}
std::string getServerUrl() const
{
return "http://" + server_data.socket->address().toString();
}
void startServer()
{
server_data.reset();
server_data.params = new Poco::Net::HTTPServerParams();
server_data.socket = std::make_unique<Poco::Net::ServerSocket>(server_data.port);
server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive);
server_data.server = std::make_unique<Poco::Net::HTTPServer>(
server_data.handler_factory, *server_data.socket, server_data.params);
server_data.server->start();
}
Poco::Net::HTTPServer & getServer() const
{
return *server_data.server;
}
void setSlowDown(size_t seconds)
{
*slowdown_receive = seconds;
}
DB::ConnectionTimeouts timeouts;
std::shared_ptr<std::atomic<size_t>> slowdown_receive = std::make_shared<std::atomic<size_t>>(0);
struct ServerData
{
// just some port to avoid collisions with others tests
UInt16 port = 9871;
Poco::Net::HTTPServerParams::Ptr params;
std::unique_ptr<Poco::Net::ServerSocket> socket;
HTTPRequestHandlerFactory::Ptr handler_factory;
std::unique_ptr<Poco::Net::HTTPServer> server;
ServerData() = default;
ServerData(ServerData &&) = default;
ServerData & operator =(ServerData &&) = delete;
void reset()
{
if (server)
server->stop();
server = nullptr;
handler_factory = nullptr;
socket = nullptr;
params = nullptr;
}
~ServerData() {
reset();
}
};
ServerData server_data;
};
void wait_until(std::function<bool()> pred)
{
while (!pred())
sleepForMilliseconds(250);
}
void echoRequest(String data, HTTPSession & session)
{
{
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_PUT, "/", "HTTP/1.1"); // HTTP/1.1 is required for keep alive
request.setContentLength(data.size());
std::ostream & ostream = session.sendRequest(request);
ostream << data;
}
{
std::stringstream result;
Poco::Net::HTTPResponse response;
std::istream & istream = session.receiveResponse(response);
ASSERT_EQ(response.getStatus(), Poco::Net::HTTPResponse::HTTP_OK);
stream_copy_n(istream, result);
ASSERT_EQ(data, result.str());
}
}
TEST_F(ConnectionPoolTest, CanConnect)
{
auto pool = getPool();
auto connection = pool->getConnection(timeouts);
ASSERT_TRUE(connection->connected());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
wait_until([&] () { return getServer().currentConnections() == 1; });
ASSERT_EQ(1, getServer().currentConnections());
ASSERT_EQ(1, getServer().totalConnections());
connection->reset();
wait_until([&] () { return getServer().currentConnections() == 0; });
ASSERT_EQ(0, getServer().currentConnections());
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
}
TEST_F(ConnectionPoolTest, CanRequest)
{
auto pool = getPool();
auto connection = pool->getConnection(timeouts);
echoRequest("Hello", *connection);
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, getServer().currentConnections());
connection->reset();
wait_until([&] () { return getServer().currentConnections() == 0; });
ASSERT_EQ(0, getServer().currentConnections());
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
}
TEST_F(ConnectionPoolTest, CanPreserve)
{
auto pool = getPool();
{
auto connection = pool->getConnection(timeouts);
}
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count));
wait_until([&] () { return getServer().currentConnections() == 1; });
ASSERT_EQ(1, getServer().currentConnections());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
}
TEST_F(ConnectionPoolTest, CanReuse)
{
auto pool = getPool();
{
auto connection = pool->getConnection(timeouts);
// DB::setReuseTag(*connection);
}
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count));
{
auto connection = pool->getConnection(timeouts);
ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
wait_until([&] () { return getServer().currentConnections() == 1; });
ASSERT_EQ(1, getServer().currentConnections());
echoRequest("Hello", *connection);
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, getServer().currentConnections());
connection->reset();
}
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
wait_until([&] () { return getServer().currentConnections() == 0; });
ASSERT_EQ(0, getServer().currentConnections());
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
}
TEST_F(ConnectionPoolTest, CanReuse10)
{
auto pool = getPool();
for (int i = 0; i < 10; ++i)
{
auto connection = pool->getConnection(timeouts);
echoRequest("Hello", *connection);
}
{
auto connection = pool->getConnection(timeouts);
connection->reset(); // reset just not to wait its expiration here
}
wait_until([&] () { return getServer().currentConnections() == 0; });
ASSERT_EQ(0, getServer().currentConnections());
ASSERT_EQ(1, getServer().totalConnections());
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
}
TEST_F(ConnectionPoolTest, CanReuse5)
{
timeouts.withHTTPKeepAliveTimeout(1);
auto pool = getPool();
std::vector<DB::HTTPSessionPtr> connections;
connections.reserve(5);
for (int i = 0; i < 5; ++i)
{
connections.push_back(pool->getConnection(timeouts));
}
connections.clear();
ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count));
wait_until([&] () { return getServer().currentConnections() == 5; });
ASSERT_EQ(5, getServer().currentConnections());
ASSERT_EQ(5, getServer().totalConnections());
for (int i = 0; i < 5; ++i)
{
auto connection = pool->getConnection(timeouts);
echoRequest("Hello", *connection);
}
ASSERT_EQ(5, getServer().totalConnections());
ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count));
}
TEST_F(ConnectionPoolTest, CanReconnectAndCreate)
{
auto pool = getPool();
std::vector<HTTPSessionPtr> in_use;
const size_t count = 2;
for (int i = 0; i < count; ++i)
{
auto connection = pool->getConnection(timeouts);
// DB::setReuseTag(*connection);
in_use.push_back(connection);
}
ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(count, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
auto connection = std::move(in_use.back());
in_use.pop_back();
echoRequest("Hello", *connection);
connection->abort(); // further usage requires reconnect, new connection
echoRequest("Hello", *connection);
connection->reset();
wait_until([&] () { return getServer().currentConnections() == 1; });
ASSERT_EQ(1, getServer().currentConnections());
ASSERT_EQ(count+1, getServer().totalConnections());
ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
}
TEST_F(ConnectionPoolTest, CanReconnectAndReuse)
{
auto pool = getPool();
std::vector<HTTPSessionPtr> in_use;
const size_t count = 2;
for (int i = 0; i < count; ++i)
{
auto connection = pool->getConnection(timeouts);
// DB::setReuseTag(*connection);
in_use.push_back(std::move(connection));
}
auto connection = std::move(in_use.back());
in_use.pop_back();
in_use.clear(); // other connection will be reused
echoRequest("Hello", *connection);
connection->abort(); // further usage requires reconnect, reuse connection from pool
echoRequest("Hello", *connection);
connection->reset();
wait_until([&] () { return getServer().currentConnections() == 0; });
ASSERT_EQ(0, getServer().currentConnections());
ASSERT_EQ(2, getServer().totalConnections());
ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
}
TEST_F(ConnectionPoolTest, ReceiveTimeout)
{
setSlowDown(2);
timeouts.withReceiveTimeout(1);
auto pool = getPool();
{
auto connection = pool->getConnection(timeouts);
ASSERT_ANY_THROW(
echoRequest("Hello", *connection);
);
}
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]);
{
timeouts.withReceiveTimeout(3);
auto connection = pool->getConnection(timeouts);
ASSERT_NO_THROW(
echoRequest("Hello", *connection);
);
}
ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]);
{
/// timeouts have effect for reused session
timeouts.withReceiveTimeout(1);
auto connection = pool->getConnection(timeouts);
ASSERT_ANY_THROW(
echoRequest("Hello", *connection);
);
}
ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]);
ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]);
}
TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP)
{
std::string_view message = "Hello ReadWriteBufferFromHTTP";
auto uri = Poco::URI(getServerUrl());
auto metrics = DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{})->getMetrics();
Poco::Net::HTTPBasicCredentials empty_creds;
auto buf_from_http = DB::BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(DB::HTTPConnectionGroupType::HTTP)
.withOutCallback(
[&] (std::ostream & in)
{
in << message;
})
.withDelayInit(false)
.create(empty_creds);
ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count));
char buf[256];
std::fill(buf, buf + sizeof(buf), 0);
buf_from_http->readStrict(buf, message.size());
ASSERT_EQ(std::string_view(buf), message);
ASSERT_TRUE(buf_from_http->eof());
buf_from_http.reset();
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]);
ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count));
}
TEST_F(ConnectionPoolTest, HardLimit)
{
DB::HTTPConnectionPools::Limits zero_limits {0, 0, 0};
DB::HTTPConnectionPools::instance().setLimits(zero_limits, zero_limits, zero_limits);
auto pool = getPool();
{
auto connection = pool->getConnection(timeouts);
}
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]);
}
TEST_F(ConnectionPoolTest, NoReceiveCall)
{
auto pool = getPool();
{
auto connection = pool->getConnection(timeouts);
{
auto data = String("Hello");
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_PUT, "/", "HTTP/1.1"); // HTTP/1.1 is required for keep alive
request.setContentLength(data.size());
std::ostream & ostream = connection->sendRequest(request);
ostream << data;
}
connection->flushRequest();
}
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count));
ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count));
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]);
ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]);
}

View File

@ -0,0 +1,278 @@
#include <IO/WriteBufferFromFile.h>
#include <Common/HostResolvePool.h>
#include <base/sleep.h>
#include <thread>
#include <gtest/gtest.h>
class ResolvePoolMock : public DB::HostResolver
{
public:
using ResolveFunction = DB::HostResolver::ResolveFunction;
ResolvePoolMock(String host_, Poco::Timespan history_, ResolveFunction && func)
: DB::HostResolver(std::move(func), std::move(host_), history_)
{
}
};
class ResolvePoolTest : public testing::Test
{
protected:
ResolvePoolTest()
{
DB::HostResolversPool::instance().dropCache();
}
void SetUp() override {
DB::CurrentThread::getProfileEvents().reset();
ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count));
addresses = std::set<String>{"127.0.0.1", "127.0.0.2", "127.0.0.3"};
// Code here will be called immediately after the constructor (right
// before each test).
}
void TearDown() override {
// Code here will be called immediately after each test (right
// before the destructor).
}
DB::HostResolver::Ptr make_resolver(size_t history_ms = 200)
{
auto resolve_func = [&] (const String &)
{
std::vector<Poco::Net::IPAddress> result;
result.reserve(addresses.size());
for (const auto & item : addresses)
{
result.push_back(Poco::Net::IPAddress(item));
}
return result;
};
return std::make_shared<ResolvePoolMock>("some_host", Poco::Timespan(history_ms * 1000), std::move(resolve_func));
}
DB::HostResolverMetrics metrics = DB::HostResolver::getMetrics();
std::set<String> addresses;
};
TEST_F(ResolvePoolTest, CanResolve)
{
auto resolver = make_resolver();
auto address = resolver->resolve();
ASSERT_TRUE(addresses.contains(*address));
ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]);
ASSERT_EQ(addresses.size(), CurrentMetrics::get(metrics.active_count));
}
TEST_F(ResolvePoolTest, CanResolveAll)
{
auto resolver = make_resolver();
std::set<String> results;
while (results.size() != addresses.size())
{
auto next_addr = resolver->resolve();
results.insert(*next_addr);
}
ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]);
}
size_t getSum(std::map<String, size_t> container)
{
size_t sum = 0;
for (auto & [_, val] : container)
{
sum += val;
}
return sum;
}
size_t getMin(std::map<String, size_t> container)
{
if (container.empty())
return 0;
size_t min_val = container.begin()->second;
for (auto & [_, val] : container)
{
min_val = std::min(min_val, val);
}
return min_val;
}
double getMean(std::map<String, size_t> container)
{
return 1.0 * getSum(container) / container.size();
}
double getMaxDiff(std::map<String, size_t> container, double ref_val)
{
double diff = 0.0;
for (auto & [_, val] : container)
{
diff = std::max(std::fabs(val - ref_val), diff);
}
return diff;
}
TEST_F(ResolvePoolTest, CanResolveEvenly)
{
auto resolver = make_resolver();
std::map<String, size_t> results;
for (size_t i = 0; i < 50000; ++i)
{
auto next_addr = resolver->resolve();
if (results.contains(*next_addr))
{
results[*next_addr] += 1;
}
else
{
results[*next_addr] = 1;
}
}
auto mean = getMean(results);
auto diff = getMaxDiff(results, mean);
ASSERT_GT(0.3 * mean, diff);
}
TEST_F(ResolvePoolTest, CanMerge)
{
auto resolver = make_resolver(100000);
auto address = resolver->resolve();
ASSERT_TRUE(addresses.contains(*address));
ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]);
auto old_addresses = addresses;
addresses = std::set<String>{"127.0.0.4", "127.0.0.5"};
resolver->update();
ASSERT_EQ(addresses.size() + old_addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]);
ASSERT_EQ(addresses.size() + old_addresses.size(), CurrentMetrics::get(metrics.active_count));
std::set<String> results;
while (results.size() != addresses.size() + old_addresses.size())
{
auto next_addr = resolver->resolve();
results.insert(*next_addr);
}
}
TEST_F(ResolvePoolTest, CanGainEven)
{
auto resolver = make_resolver();
auto address = resolver->resolve();
std::map<String, size_t> results;
for (size_t i = 0; i < 40000; ++i)
{
auto next_addr = resolver->resolve();
if (results.contains(*next_addr))
{
results[*next_addr] += 1;
}
else
{
results[*next_addr] = 1;
}
}
ASSERT_GT(getMin(results), 10000);
addresses.insert("127.0.0.4");
addresses.insert("127.0.0.5");
resolver->update();
/// return mostly new addresses
for (size_t i = 0; i < 3000; ++i)
{
auto next_addr = resolver->resolve();
if (results.contains(*next_addr))
{
results[*next_addr] += 1;
}
else
{
results[*next_addr] = 1;
}
}
ASSERT_EQ(results.size(), 5);
ASSERT_GT(getMin(results), 1000);
}
TEST_F(ResolvePoolTest, CanFail)
{
auto resolver = make_resolver(10000);
auto failed_addr = resolver->resolve();
failed_addr.setFail();
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.failed]);
ASSERT_EQ(addresses.size(), CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]);
for (size_t i = 0; i < 1000; ++i)
{
auto next_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*next_addr));
ASSERT_NE(*next_addr, *failed_addr);
}
}
TEST_F(ResolvePoolTest, CanFailAndHeal)
{
auto resolver = make_resolver();
auto failed_addr = resolver->resolve();
failed_addr.setFail();
while (true)
{
auto next_addr = resolver->resolve();
if (*failed_addr == *next_addr)
break;
}
}
TEST_F(ResolvePoolTest, CanExpire)
{
auto resolver = make_resolver();
auto expired_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*expired_addr));
addresses.erase(*expired_addr);
sleepForSeconds(1);
for (size_t i = 0; i < 1000; ++i)
{
auto next_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*next_addr));
ASSERT_NE(*next_addr, *expired_addr);
}
ASSERT_EQ(addresses.size() + 1, DB::CurrentThread::getProfileEvents()[metrics.discovered]);
ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]);
}

View File

@ -128,8 +128,17 @@ namespace DB
M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
M(UInt64, disk_connections_soft_limit, 1000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_store_limit, 12000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \
M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \
M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)

View File

@ -110,7 +110,7 @@ class IColumn;
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. This only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
@ -700,7 +700,7 @@ class IColumn;
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \
M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \
M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \
\
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \

View File

@ -85,7 +85,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.3", {{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},

View File

@ -118,6 +118,7 @@ IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS,
{"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT},
{"throw_only_active", DistributedDDLOutputMode::THROW_ONLY_ACTIVE},
{"null_status_on_timeout_only_active", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE},
{"none_only_active", DistributedDDLOutputMode::NONE_ONLY_ACTIVE},
{"never_throw", DistributedDDLOutputMode::NEVER_THROW}})
IMPLEMENT_SETTING_ENUM(StreamingHandleErrorMode, ErrorCodes::BAD_ARGUMENTS,

View File

@ -177,6 +177,7 @@ enum class DistributedDDLOutputMode
NEVER_THROW,
THROW_ONLY_ACTIVE,
NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE,
NONE_ONLY_ACTIVE,
};
DECLARE_SETTING_ENUM(DistributedDDLOutputMode)

View File

@ -88,20 +88,18 @@ void HTTPDictionarySource::getUpdateFieldAndDate(Poco::URI & uri)
QueryPipeline HTTPDictionarySource::loadAll()
{
LOG_TRACE(log, "loadAll {}", toString());
Poco::URI uri(configuration.url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_GET,
ReadWriteBufferFromHTTP::OutStreamCallback(),
timeouts,
credentials,
0,
DBMS_DEFAULT_BUFFER_SIZE,
context->getReadSettings(),
configuration.header_entries,
nullptr, false);
return createWrappedBuffer(std::move(in_ptr));
Poco::URI uri(configuration.url);
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
}
QueryPipeline HTTPDictionarySource::loadUpdatedAll()
@ -109,19 +107,16 @@ QueryPipeline HTTPDictionarySource::loadUpdatedAll()
Poco::URI uri(configuration.url);
getUpdateFieldAndDate(uri);
LOG_TRACE(log, "loadUpdatedAll {}", uri.toString());
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_GET,
ReadWriteBufferFromHTTP::OutStreamCallback(),
timeouts,
credentials,
0,
DBMS_DEFAULT_BUFFER_SIZE,
context->getReadSettings(),
configuration.header_entries,
nullptr, false);
return createWrappedBuffer(std::move(in_ptr));
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
}
QueryPipeline HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
@ -139,19 +134,18 @@ QueryPipeline HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
};
Poco::URI uri(configuration.url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
out_stream_callback,
timeouts,
credentials,
0,
DBMS_DEFAULT_BUFFER_SIZE,
context->getReadSettings(),
configuration.header_entries,
nullptr, false);
return createWrappedBuffer(std::move(in_ptr));
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withOutCallback(std::move(out_stream_callback))
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
}
QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
@ -169,19 +163,18 @@ QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const
};
Poco::URI uri(configuration.url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
out_stream_callback,
timeouts,
credentials,
0,
DBMS_DEFAULT_BUFFER_SIZE,
context->getReadSettings(),
configuration.header_entries,
nullptr, false);
return createWrappedBuffer(std::move(in_ptr));
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withOutCallback(std::move(out_stream_callback))
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
}
bool HTTPDictionarySource::isModified() const

View File

@ -203,7 +203,7 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request)
}
QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & url, const Block & required_sample_block, const std::string & query) const
QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & uri, const Block & required_sample_block, const std::string & query) const
{
bridge_helper->startBridgeSync();
@ -214,10 +214,15 @@ QueryPipeline XDBCDictionarySource::loadFromQuery(const Poco::URI & url, const B
os << "query=" << escapeForFileName(query);
};
auto read_buf = std::make_unique<ReadWriteBufferFromHTTP>(
url, Poco::Net::HTTPRequest::HTTP_POST, write_body_callback, timeouts, credentials);
auto format = getContext()->getInputFormat(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, required_sample_block, max_block_size);
format->addBuffer(std::move(read_buf));
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(timeouts)
.withOutCallback(std::move(write_body_callback))
.create(credentials);
auto format = getContext()->getInputFormat(IXDBCBridgeHelper::DEFAULT_FORMAT, *buf, required_sample_block, max_block_size);
format->addBuffer(std::move(buf));
return QueryPipeline(std::move(format));
}

View File

@ -74,7 +74,7 @@ private:
// execute invalidate_query. expects single cell in result
std::string doInvalidateQuery(const std::string & request) const;
QueryPipeline loadFromQuery(const Poco::URI & url, const Block & required_sample_block, const std::string & query) const;
QueryPipeline loadFromQuery(const Poco::URI & uri, const Block & required_sample_block, const std::string & query) const;
LoggerPtr log;

View File

@ -1,8 +1,6 @@
#include "ReadBufferFromWebServer.h"
#include <Common/logger_useful.h>
#include <base/sleep.h>
#include <Core/Types.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
@ -45,12 +43,6 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
{
if (read_until_position < offset)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
LOG_DEBUG(log, "Reading with range: {}-{}", offset, read_until_position);
}
else
{
LOG_DEBUG(log, "Reading from offset: {}", offset);
}
const auto & settings = context->getSettingsRef();
@ -60,19 +52,14 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
connection_timeouts.withConnectionTimeout(std::max<Poco::Timespan>(settings.http_connection_timeout, Poco::Timespan(20, 0)));
connection_timeouts.withReceiveTimeout(std::max<Poco::Timespan>(settings.http_receive_timeout, Poco::Timespan(20, 0)));
auto res = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
Poco::Net::HTTPRequest::HTTP_GET,
ReadWriteBufferFromHTTP::OutStreamCallback(),
connection_timeouts,
credentials,
0,
buf_size,
read_settings,
HTTPHeaderEntries{},
&context->getRemoteHostFilter(),
/* delay_initialization */true,
use_external_buffer);
auto res = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::DISK)
.withSettings(read_settings)
.withTimeouts(connection_timeouts)
.withBufSize(buf_size)
.withHostFilter(&context->getRemoteHostFilter())
.withExternalBuf(use_external_buffer)
.create(credentials);
if (read_until_position)
res->setReadUntilPosition(read_until_position);
@ -101,44 +88,43 @@ bool ReadBufferFromWebServer::nextImpl()
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
}
if (impl)
{
if (!use_external_buffer)
{
/**
* impl was initialized before, pass position() to it to make
* sure there is no pending data which was not read, because
* this branch means we read sequentially.
*/
impl->position() = position();
assert(!impl->hasPendingData());
}
}
else
if (!impl)
{
impl = initialize();
if (!use_external_buffer)
{
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset());
}
}
if (use_external_buffer)
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not, because this branch
* means we are prefetching data, each nextImpl() call we can fill
* a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
}
else
{
impl->position() = position();
}
chassert(available() == 0);
chassert(pos >= working_buffer.begin());
chassert(pos <= working_buffer.end());
chassert(working_buffer.begin() != nullptr);
chassert(impl->buffer().begin() != nullptr);
chassert(impl->available() == 0);
auto result = impl->next();
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset());
chassert(working_buffer.begin() == impl->buffer().begin());
if (result)
{
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset());
offset += working_buffer.size();
}
return result;
}

View File

@ -67,10 +67,6 @@ std::unique_ptr<S3::Client> getClient(
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
client_configuration.endpointOverride = uri.endpoint;
client_configuration.http_keep_alive_timeout_ms = config.getUInt(
config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
client_configuration.wait_on_pool_size_limit = false;
client_configuration.s3_use_adaptive_timeouts = config.getBool(
config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);

View File

@ -44,34 +44,34 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
{
Poco::Net::HTTPBasicCredentials credentials{};
ReadWriteBufferFromHTTP metadata_buf(
Poco::URI(fs::path(full_url) / ".index"),
Poco::Net::HTTPRequest::HTTP_GET,
ReadWriteBufferFromHTTP::OutStreamCallback(),
ConnectionTimeouts::getHTTPTimeouts(
getContext()->getSettingsRef(),
getContext()->getServerSettings().keep_alive_timeout),
credentials,
/* max_redirects= */ 0,
/* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE,
getContext()->getReadSettings());
auto timeouts = ConnectionTimeouts::getHTTPTimeouts(
getContext()->getSettingsRef(),
getContext()->getServerSettings().keep_alive_timeout);
auto metadata_buf = BuilderRWBufferFromHTTP(Poco::URI(fs::path(full_url) / ".index"))
.withConnectionGroup(HTTPConnectionGroupType::DISK)
.withSettings(getContext()->getReadSettings())
.withTimeouts(timeouts)
.withHostFilter(&getContext()->getRemoteHostFilter())
.withSkipNotFound(true)
.create(credentials);
String file_name;
while (!metadata_buf.eof())
while (!metadata_buf->eof())
{
readText(file_name, metadata_buf);
assertChar('\t', metadata_buf);
readText(file_name, *metadata_buf);
assertChar('\t', *metadata_buf);
bool is_directory;
readBoolText(is_directory, metadata_buf);
readBoolText(is_directory, *metadata_buf);
size_t size = 0;
if (!is_directory)
{
assertChar('\t', metadata_buf);
readIntText(size, metadata_buf);
assertChar('\t', *metadata_buf);
readIntText(size, *metadata_buf);
}
assertChar('\n', metadata_buf);
assertChar('\n', *metadata_buf);
FileDataPtr file_data = is_directory
? FileData::createDirectoryInfo(false)
@ -99,10 +99,6 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
}
catch (HTTPException & e)
{
/// 404 - no files
if (e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTP_NOT_FOUND)
return {};
e.addMessage("while loading disk metadata");
throw;
}

View File

@ -122,7 +122,7 @@ public:
for (size_t i = 0; i < input_rows_count; ++i)
{
std::string_view sqid = col_non_const->getDataAt(i).toView();
std::vector<UInt64> integers = sqids.decode(sqid);
std::vector<UInt64> integers = sqids.decode(String(sqid));
res_nested_data.insert(integers.begin(), integers.end());
res_offsets_data.push_back(integers.size());
}

View File

@ -141,4 +141,19 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method
.withReceiveTimeout(saturate(recv, receive_timeout));
}
void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
{
session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
}
ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session)
{
return ConnectionTimeouts()
.withConnectionTimeout(session.getConnectionTimeout())
.withSendTimeout(session.getSendTimeout())
.withReceiveTimeout(session.getReceiveTimeout())
.withHTTPKeepAliveTimeout(session.getKeepAliveTimeout());
}
}

View File

@ -4,6 +4,7 @@
#include <Core/ServerSettings.h>
#include <Interpreters/Context_fwd.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Timespan.h>
namespace DB
@ -111,4 +112,7 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time
return *this;
}
void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts);
ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session);
}

View File

@ -2,13 +2,7 @@
#include <Server/HTTP/HTTPServerResponse.h>
#include <Poco/Any.h>
#include <Common/Concepts.h>
#include <Common/DNSResolver.h>
#include <Common/Exception.h>
#include <Common/MemoryTrackerSwitcher.h>
#include <Common/PoolBase.h>
#include <Common/ProfileEvents.h>
#include <Common/SipHash.h>
#include "config.h"
@ -25,338 +19,18 @@
#include <Poco/Util/Application.h>
#include <sstream>
#include <tuple>
#include <istream>
#include <unordered_map>
#include <Common/ProxyConfiguration.h>
namespace ProfileEvents
{
extern const Event CreatedHTTPConnections;
}
namespace DB
{
namespace ErrorCodes
{
extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS;
extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME;
extern const int UNSUPPORTED_URI_SCHEME;
extern const int LOGICAL_ERROR;
}
namespace
{
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration)
{
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
poco_proxy_config.host = proxy_configuration.host;
poco_proxy_config.port = proxy_configuration.port;
poco_proxy_config.protocol = ProxyConfiguration::protocolToString(proxy_configuration.protocol);
poco_proxy_config.tunnel = proxy_configuration.tunneling;
poco_proxy_config.originalRequestProtocol = ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
return poco_proxy_config;
}
template <typename Session>
requires std::derived_from<Session, Poco::Net::HTTPClientSession>
class HTTPSessionAdapter : public Session
{
static_assert(std::has_virtual_destructor_v<Session>, "The base class must have a virtual destructor");
public:
HTTPSessionAdapter(const std::string & host, UInt16 port) : Session(host, port), log{getLogger("HTTPSessionAdapter")} { }
~HTTPSessionAdapter() override = default;
protected:
void reconnect() override
{
// First of all will try to establish connection with last used addr.
if (!Session::getResolvedHost().empty())
{
try
{
Session::reconnect();
return;
}
catch (...)
{
Session::close();
LOG_TRACE(
log,
"Last ip ({}) is unreachable for {}:{}. Will try another resolved address.",
Session::getResolvedHost(),
Session::getHost(),
Session::getPort());
}
}
const auto endpoinds = DNSResolver::instance().resolveHostAll(Session::getHost());
for (auto it = endpoinds.begin();;)
{
try
{
Session::setResolvedHost(it->toString());
Session::reconnect();
LOG_TRACE(
log,
"Created HTTP(S) session with {}:{} ({}:{})",
Session::getHost(),
Session::getPort(),
it->toString(),
Session::getPort());
break;
}
catch (...)
{
Session::close();
if (++it == endpoinds.end())
{
Session::setResolvedHost("");
throw;
}
LOG_TRACE(
log,
"Failed to create connection with {}:{}, Will try another resolved address. {}",
Session::getResolvedHost(),
Session::getPort(),
getCurrentExceptionMessage(false));
}
}
}
LoggerPtr log;
};
bool isHTTPS(const Poco::URI & uri)
{
if (uri.getScheme() == "https")
return true;
else if (uri.getScheme() == "http")
return false;
else
throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString());
}
HTTPSessionPtr makeHTTPSessionImpl(
const std::string & host,
UInt16 port,
bool https,
bool keep_alive,
DB::ProxyConfiguration proxy_configuration = {})
{
HTTPSessionPtr session;
if (!proxy_configuration.host.empty())
{
bool is_proxy_http_and_is_tunneling_off = DB::ProxyConfiguration::Protocol::HTTP == proxy_configuration.protocol
&& !proxy_configuration.tunneling;
// If it is an HTTPS request, proxy server is HTTP and user opted for tunneling off, we must not create an HTTPS request.
// The desired flow is: HTTP request to the proxy server, then proxy server will initiate an HTTPS request to the target server.
// There is a weak link in the security, but that's what the user opted for.
if (https && is_proxy_http_and_is_tunneling_off)
{
https = false;
}
}
if (https)
{
#if USE_SSL
session = std::make_shared<HTTPSessionAdapter<Poco::Net::HTTPSClientSession>>(host, port);
#else
throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "ClickHouse was built without HTTPS support");
#endif
}
else
{
session = std::make_shared<HTTPSessionAdapter<Poco::Net::HTTPClientSession>>(host, port);
}
ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections);
/// doesn't work properly without patch
session->setKeepAlive(keep_alive);
if (!proxy_configuration.host.empty())
{
session->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration));
}
return session;
}
class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
{
private:
const std::string host;
const UInt16 port;
const bool https;
ProxyConfiguration proxy_config;
using Base = PoolBase<Poco::Net::HTTPClientSession>;
ObjectPtr allocObject() override
{
/// Pool is global, we shouldn't attribute this memory to query/user.
MemoryTrackerSwitcher switcher{&total_memory_tracker};
auto session = makeHTTPSessionImpl(host, port, https, true, proxy_config);
return session;
}
public:
SingleEndpointHTTPSessionPool(
const std::string & host_,
UInt16 port_,
bool https_,
ProxyConfiguration proxy_config_,
size_t max_pool_size_,
bool wait_on_pool_size_limit)
: Base(
static_cast<unsigned>(max_pool_size_),
getLogger("HTTPSessionPool"),
wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool)
, host(host_)
, port(port_)
, https(https_)
, proxy_config(proxy_config_)
{
}
};
class HTTPSessionPool : private boost::noncopyable
{
public:
struct Key
{
String target_host;
UInt16 target_port;
bool is_target_https;
ProxyConfiguration proxy_config;
bool wait_on_pool_size_limit;
bool operator ==(const Key & rhs) const
{
return std::tie(
target_host,
target_port,
is_target_https,
proxy_config.host,
proxy_config.port,
proxy_config.protocol,
proxy_config.tunneling,
proxy_config.original_request_protocol,
wait_on_pool_size_limit)
== std::tie(
rhs.target_host,
rhs.target_port,
rhs.is_target_https,
rhs.proxy_config.host,
rhs.proxy_config.port,
rhs.proxy_config.protocol,
rhs.proxy_config.tunneling,
rhs.proxy_config.original_request_protocol,
rhs.wait_on_pool_size_limit);
}
};
private:
using PoolPtr = std::shared_ptr<SingleEndpointHTTPSessionPool>;
using Entry = SingleEndpointHTTPSessionPool::Entry;
struct Hasher
{
size_t operator()(const Key & k) const
{
SipHash s;
s.update(k.target_host);
s.update(k.target_port);
s.update(k.is_target_https);
s.update(k.proxy_config.host);
s.update(k.proxy_config.port);
s.update(k.proxy_config.protocol);
s.update(k.proxy_config.tunneling);
s.update(k.proxy_config.original_request_protocol);
s.update(k.wait_on_pool_size_limit);
return s.get64();
}
};
std::mutex mutex;
std::unordered_map<Key, PoolPtr, Hasher> endpoints_pool;
protected:
HTTPSessionPool() = default;
public:
static auto & instance()
{
static HTTPSessionPool instance;
return instance;
}
Entry getSession(
const Poco::URI & uri,
const ProxyConfiguration & proxy_config,
const ConnectionTimeouts & timeouts,
size_t max_connections_per_endpoint,
bool wait_on_pool_size_limit)
{
std::unique_lock lock(mutex);
const std::string & host = uri.getHost();
UInt16 port = uri.getPort();
bool https = isHTTPS(uri);
HTTPSessionPool::Key key{host, port, https, proxy_config, wait_on_pool_size_limit};
auto pool_ptr = endpoints_pool.find(key);
if (pool_ptr == endpoints_pool.end())
std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace(
key,
std::make_shared<SingleEndpointHTTPSessionPool>(
host,
port,
https,
proxy_config,
max_connections_per_endpoint,
wait_on_pool_size_limit));
/// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame.
/// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any
/// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`.
/// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`.
lock.unlock();
auto retry_timeout = timeouts.connection_timeout.totalMilliseconds();
auto session = pool_ptr->second->get(retry_timeout);
const auto & session_data = session->sessionData();
if (session_data.empty() || !Poco::AnyCast<HTTPSessionReuseTag>(&session_data))
{
/// Reset session if it is not reusable. See comment for HTTPSessionReuseTag.
session->reset();
}
session->attachSessionData({});
setTimeouts(*session, timeouts);
return session;
}
};
}
void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
{
session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
}
void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
@ -370,28 +44,13 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_
}
HTTPSessionPtr makeHTTPSession(
HTTPConnectionGroupType group,
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
ProxyConfiguration proxy_configuration
)
ProxyConfiguration proxy_configuration)
{
const std::string & host = uri.getHost();
UInt16 port = uri.getPort();
bool https = isHTTPS(uri);
auto session = makeHTTPSessionImpl(host, port, https, false, proxy_configuration);
setTimeouts(*session, timeouts);
return session;
}
PooledHTTPSessionPtr makePooledHTTPSession(
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
size_t per_endpoint_pool_size,
bool wait_on_pool_size_limit,
ProxyConfiguration proxy_config)
{
return HTTPSessionPool::instance().getSession(uri, proxy_config, timeouts, per_endpoint_pool_size, wait_on_pool_size_limit);
auto connection_pool = HTTPConnectionPools::instance().getPool(group, uri, proxy_configuration);
return connection_pool->getConnection(timeouts);
}
bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; }
@ -400,11 +59,11 @@ std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, const bool allow_redirects)
{
auto & istr = session.receiveResponse(response);
assertResponseIsOk(request, response, istr, allow_redirects);
assertResponseIsOk(request.getURI(), response, istr, allow_redirects);
return &istr;
}
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects)
void assertResponseIsOk(const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects)
{
auto status = response.getStatus();
@ -422,7 +81,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR
body.exceptions(std::ios::failbit);
body << istr.rdbuf();
throw HTTPException(code, request.getURI(), status, response.getReason(), body.str());
throw HTTPException(code, uri, status, response.getReason(), body.str());
}
}
@ -440,24 +99,4 @@ Exception HTTPException::makeExceptionMessage(
uri, static_cast<int>(http_status), reason, body);
}
void markSessionForReuse(Poco::Net::HTTPSession & session)
{
const auto & session_data = session.sessionData();
if (!session_data.empty() && !Poco::AnyCast<HTTPSessionReuseTag>(&session_data))
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name());
session.attachSessionData(HTTPSessionReuseTag{});
}
void markSessionForReuse(HTTPSessionPtr session)
{
markSessionForReuse(*session);
}
void markSessionForReuse(PooledHTTPSessionPtr session)
{
markSessionForReuse(static_cast<Poco::Net::HTTPSession &>(*session));
}
}

View File

@ -7,9 +7,9 @@
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Common/PoolBase.h>
#include <Common/ProxyConfiguration.h>
#include <Poco/URIStreamFactory.h>
#include <Common/HTTPConnectionPool.h>
#include <Common/ProxyConfiguration.h>
#include <IO/ConnectionTimeouts.h>
@ -36,7 +36,7 @@ public:
HTTPException * clone() const override { return new HTTPException(*this); }
void rethrow() const override { throw *this; }
int getHTTPStatus() const { return http_status; }
Poco::Net::HTTPResponse::HTTPStatus getHTTPStatus() const { return http_status; }
private:
Poco::Net::HTTPResponse::HTTPStatus http_status{};
@ -52,55 +52,18 @@ private:
const char * className() const noexcept override { return "DB::HTTPException"; }
};
using PooledHTTPSessionPtr = PoolBase<Poco::Net::HTTPClientSession>::Entry; // SingleEndpointHTTPSessionPool::Entry
using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
/// If a session have this tag attached, it will be reused without calling `reset()` on it.
/// All pooled sessions don't have this tag attached after being taken from a pool.
/// If the request and the response were fully written/read, the client code should add this tag
/// explicitly by calling `markSessionForReuse()`.
///
/// Note that HTTP response may contain extra bytes after the last byte of the payload. Specifically,
/// when chunked encoding is used, there's an empty chunk at the end. Those extra bytes must also be
/// read before the session can be reused. So we usually put an `istr->ignore(INT64_MAX)` call
/// before `markSessionForReuse()`.
struct HTTPSessionReuseTag
{
};
void markSessionForReuse(Poco::Net::HTTPSession & session);
void markSessionForReuse(HTTPSessionPtr session);
void markSessionForReuse(PooledHTTPSessionPtr session);
void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout);
/// Create session object to perform requests and set required parameters.
HTTPSessionPtr makeHTTPSession(
HTTPConnectionGroupType group,
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
ProxyConfiguration proxy_config = {}
);
/// As previous method creates session, but takes it from pool, without and with proxy uri.
///
/// The max_connections_per_endpoint parameter makes it look like the pool size can be different for
/// different requests (whatever that means), but actually we just assign the endpoint's connection
/// pool size when we see the endpoint for the first time, then we never change it.
/// We should probably change how this configuration works, and how this pooling works in general:
/// * Make the per_endpoint_pool_size be a global server setting instead of per-disk or per-query.
/// * Have boolean per-disk/per-query settings for enabling/disabling pooling.
/// * Add a limit on the number of endpoints and the total number of sessions across all endpoints.
/// * Enable pooling by default everywhere. In particular StorageURL and StorageS3.
/// (Enabling it for StorageURL is scary without the previous item - the user may query lots of
/// different endpoints. So currently pooling is mainly used for S3.)
PooledHTTPSessionPtr makePooledHTTPSession(
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
size_t per_endpoint_pool_size,
bool wait_on_pool_size_limit = true,
ProxyConfiguration proxy_config = {});
bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status);
/** Used to receive response (response headers and possibly body)
@ -112,7 +75,6 @@ std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
void assertResponseIsOk(
const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts);
}

View File

@ -1,5 +1,4 @@
#include <IO/LimitReadBuffer.h>
#include <Common/Exception.h>
@ -15,7 +14,7 @@ namespace ErrorCodes
bool LimitReadBuffer::nextImpl()
{
assert(position() >= in->position());
chassert(position() >= in->position());
/// Let underlying buffer calculate read bytes in `next()` call.
in->position() = position();
@ -39,20 +38,18 @@ bool LimitReadBuffer::nextImpl()
if (exact_limit && bytes != *exact_limit)
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, *exact_limit);
/// Clearing the buffer with existing data.
set(in->position(), 0);
BufferBase::set(in->position(), 0, 0);
return false;
}
working_buffer = in->buffer();
if (limit - bytes < working_buffer.size())
working_buffer.resize(limit - bytes);
BufferBase::set(in->position(), std::min(in->available(), limit - bytes), 0);
return true;
}
LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_,
LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_,
std::optional<size_t> exact_limit_, std::string exception_message_)
: ReadBuffer(in_ ? in_->position() : nullptr, 0)
, in(in_)
@ -62,24 +59,20 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, boo
, exact_limit(exact_limit_)
, exception_message(std::move(exception_message_))
{
assert(in);
chassert(in);
size_t remaining_bytes_in_buffer = in->buffer().end() - in->position();
if (remaining_bytes_in_buffer > limit)
remaining_bytes_in_buffer = limit;
working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer);
BufferBase::set(in->position(), std::min(in->available(), limit), 0);
}
LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_,
LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_,
std::optional<size_t> exact_limit_, std::string exception_message_)
: LimitReadBuffer(&in_, false, limit_, throw_exception_, exact_limit_, exception_message_)
{
}
LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_,
LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t limit_, bool throw_exception_,
std::optional<size_t> exact_limit_, std::string exception_message_)
: LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exact_limit_, exception_message_)
{

View File

@ -13,22 +13,24 @@ namespace DB
class LimitReadBuffer : public ReadBuffer
{
public:
LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_,
LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_,
std::optional<size_t> exact_limit_, std::string exception_message_ = {});
LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::optional<size_t> exact_limit_,
LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t limit_, bool throw_exception_, std::optional<size_t> exact_limit_,
std::string exception_message_ = {});
~LimitReadBuffer() override;
private:
ReadBuffer * in;
bool owns_in;
const bool owns_in;
UInt64 limit;
bool throw_exception;
std::optional<size_t> exact_limit;
std::string exception_message;
const size_t limit;
const bool throw_exception;
const std::optional<size_t> exact_limit;
const std::string exception_message;
LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::optional<size_t> exact_limit_, std::string exception_message_);
LoggerPtr log;
LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_, std::optional<size_t> exact_limit_, std::string exception_message_);
bool nextImpl() override;
};

View File

@ -92,7 +92,7 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize()
return getSizeFromFileDescriptor(getFD(), getFileName());
}
size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &)
size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) const
{
if (offset >= mapped.getLength())
return 0;

View File

@ -40,7 +40,7 @@ public:
size_t getFileSize() override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) const override;
bool supportsReadAt() override { return true; }
};

View File

@ -63,21 +63,23 @@ public:
*/
bool next()
{
assert(!hasPendingData());
assert(position() <= working_buffer.end());
chassert(!hasPendingData());
chassert(position() <= working_buffer.end());
bytes += offset();
bool res = nextImpl();
if (!res)
{
working_buffer = Buffer(pos, pos);
}
else
{
pos = working_buffer.begin() + nextimpl_working_buffer_offset;
assert(position() != working_buffer.end());
pos = working_buffer.begin() + std::min(nextimpl_working_buffer_offset, working_buffer.size());
chassert(position() < working_buffer.end());
}
nextimpl_working_buffer_offset = 0;
assert(position() <= working_buffer.end());
chassert(position() <= working_buffer.end());
return res;
}

View File

@ -49,7 +49,7 @@ std::string ReadBufferFromFileDescriptor::getFileName() const
}
size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset)
size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const
{
chassert(min_bytes <= max_bytes);
@ -265,7 +265,7 @@ bool ReadBufferFromFileDescriptor::checkIfActuallySeekable()
return res == 0 && S_ISREG(stat.st_mode);
}
size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &)
size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) const
{
chassert(use_pread);
return readImpl(to, n, n, offset);

View File

@ -34,7 +34,7 @@ protected:
/// Doesn't seek (`offset` must match fd's position if !use_pread).
/// Stops after min_bytes or eof. Returns 0 if eof.
/// Thread safe.
size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset);
size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const;
public:
explicit ReadBufferFromFileDescriptor(
@ -73,7 +73,7 @@ public:
bool checkIfActuallySeekable() override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) const override;
bool supportsReadAt() override { return use_pread; }
};

View File

@ -5,52 +5,44 @@
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_READ_FROM_ISTREAM;
}
bool ReadBufferFromIStream::nextImpl()
{
if (eof)
return false;
chassert(internal_buffer.begin() != nullptr);
chassert(!internal_buffer.empty());
size_t bytes_read = 0;
char * read_to = internal_buffer.begin();
/// It is necessary to read in a loop, since socket usually returns only data available at the moment.
while (bytes_read < internal_buffer.size())
{
try
const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read);
if (bytes_read_last_time <= 0)
{
const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read);
if (bytes_read_last_time <= 0)
{
eof = true;
break;
}
eof = true;
break;
}
bytes_read += bytes_read_last_time;
read_to += bytes_read_last_time;
}
catch (...)
{
throw Exception(
ErrorCodes::CANNOT_READ_FROM_ISTREAM,
"Cannot read from istream at offset {}: {}",
count(),
getCurrentExceptionMessage(/*with_stacktrace=*/true));
}
bytes_read += bytes_read_last_time;
read_to += bytes_read_last_time;
}
if (bytes_read)
{
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
}
return bytes_read;
}
ReadBufferFromIStream::ReadBufferFromIStream(std::istream & istr_, size_t size)
: BufferWithOwnMemory<ReadBuffer>(size), istr(istr_), stream_buf(dynamic_cast<Poco::Net::HTTPBasicStreamBuf &>(*istr.rdbuf()))
: BufferWithOwnMemory<ReadBuffer>(size)
, istr(istr_)
, stream_buf(dynamic_cast<Poco::Net::HTTPBasicStreamBuf &>(*istr.rdbuf()))
{
}

View File

@ -34,61 +34,6 @@ namespace ProfileEvents
extern const Event RemoteReadThrottlerSleepMicroseconds;
}
namespace
{
DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result)
{
if (auto * session_aware_stream = dynamic_cast<DB::S3::SessionAwareIOStream<DB::PooledHTTPSessionPtr> *>(&read_result.GetBody()))
return static_cast<DB::PooledHTTPSessionPtr &>(session_aware_stream->getSession());
if (dynamic_cast<DB::S3::SessionAwareIOStream<DB::HTTPSessionPtr> *>(&read_result.GetBody()))
return {};
/// accept result from S# mock in gtest_writebuffer_s3.cpp
if (dynamic_cast<Aws::Utils::Stream::DefaultUnderlyingStream *>(&read_result.GetBody()))
return {};
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered");
}
void resetSession(Aws::S3::Model::GetObjectResult & read_result)
{
if (auto session = getSession(read_result); !session.isNull())
{
auto & http_session = static_cast<Poco::Net::HTTPClientSession &>(*session);
http_session.reset();
}
}
void resetSessionIfNeeded(bool read_all_range_successfully, std::optional<Aws::S3::Model::GetObjectResult> & read_result)
{
if (!read_result)
return;
if (!read_all_range_successfully)
{
/// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete
/// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately.
resetSession(*read_result);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
}
else if (auto session = getSession(*read_result); !session.isNull())
{
if (!session->getProxyHost().empty())
{
/// Reset proxified sessions because proxy can change for every request. See ProxyConfigurationResolver.
resetSession(*read_result);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
}
else
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
}
}
}
}
namespace DB
{
namespace ErrorCodes
@ -228,7 +173,7 @@ bool ReadBufferFromS3::nextImpl()
}
size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback)
size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback) const
{
size_t initial_n = n;
size_t sleep_time_with_backoff_milliseconds = 100;
@ -240,29 +185,6 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
std::optional<Aws::S3::Model::GetObjectResult> result;
/// Connection is reusable if we've read the full response.
bool session_is_reusable = false;
SCOPE_EXIT(
{
if (!result.has_value())
return;
if (session_is_reusable)
{
auto session = getSession(*result);
if (!session.isNull())
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
}
else
session_is_reusable = false;
}
if (!session_is_reusable)
{
resetSession(*result);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
}
});
try
{
@ -276,9 +198,8 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
if (read_settings.remote_throttler)
read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
/// Read remaining bytes after the end of the payload, see HTTPSessionReuseTag.
/// Read remaining bytes after the end of the payload
istr.ignore(INT64_MAX);
session_is_reusable = true;
}
catch (Poco::Exception & e)
{
@ -451,21 +372,8 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess()
return false;
}
ReadBufferFromS3::~ReadBufferFromS3()
{
try
{
resetSessionIfNeeded(readAllRangeSuccessfully(), read_result);
}
catch (...)
{
tryLogCurrentException(log);
}
}
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize(size_t attempt)
{
resetSessionIfNeeded(readAllRangeSuccessfully(), read_result);
read_all_range_successfully = false;
/**
@ -534,10 +442,6 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si
}
}
bool ReadBufferFromS3::readAllRangeSuccessfully() const
{
return read_until_position ? offset == read_until_position : read_all_range_successfully;
}
}
#endif

View File

@ -55,7 +55,7 @@ public:
bool restricted_seek_ = false,
std::optional<size_t> file_size = std::nullopt);
~ReadBufferFromS3() override;
~ReadBufferFromS3() override = default;
bool nextImpl() override;
@ -74,7 +74,7 @@ public:
String getFileName() const override { return bucket + "/" + key; }
size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback) override;
size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback) const override;
bool supportsReadAt() override { return true; }
@ -90,8 +90,6 @@ private:
Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const;
bool readAllRangeSuccessfully() const;
ReadSettings read_settings;
bool use_external_buffer;

View File

@ -122,7 +122,7 @@ struct ReadSettings
// Resource to be used during reading
ResourceLink resource_link;
size_t http_max_tries = 1;
size_t http_max_tries = 10;
size_t http_retry_initial_backoff_ms = 100;
size_t http_retry_max_backoff_ms = 1600;
bool http_skip_not_found_url_for_globs = true;

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,6 @@
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Poco/URIStreamFactory.h>
#include <Common/DNSResolver.h>
#include <Common/RemoteHostFilter.h>
#include "config.h"
#include <Common/config_version.h>
@ -30,44 +29,19 @@
namespace DB
{
template <typename TSessionFactory>
class UpdatableSession
class ReadWriteBufferFromHTTP : public SeekableReadBuffer, public WithFileName, public WithFileSize
{
public:
using SessionPtr = typename TSessionFactory::SessionType;
explicit UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr<TSessionFactory> session_factory_);
SessionPtr getSession();
void updateSession(const Poco::URI & uri);
/// Thread safe.
SessionPtr createDetachedSession(const Poco::URI & uri);
std::shared_ptr<UpdatableSession<TSessionFactory>> clone(const Poco::URI & uri);
/// Information from HTTP response header.
struct HTTPFileInfo
{
// nullopt if the server doesn't report it.
std::optional<size_t> file_size;
std::optional<time_t> last_modified;
bool seekable = false;
};
private:
SessionPtr session;
UInt64 redirects{0};
UInt64 max_redirects;
Poco::URI initial_uri;
std::shared_ptr<TSessionFactory> session_factory;
};
/// Information from HTTP response header.
struct HTTPFileInfo
{
// nullopt if the server doesn't report it.
std::optional<size_t> file_size;
std::optional<time_t> last_modified;
bool seekable = false;
};
namespace detail
{
/// Byte range, including right bound [begin, end].
struct HTTPRange
{
@ -75,218 +49,210 @@ namespace detail
std::optional<size_t> end;
};
template <typename UpdatableSessionPtr>
class ReadWriteBufferFromHTTPBase : public SeekableReadBuffer, public WithFileName, public WithFileSize
struct CallResult
{
protected:
Poco::URI uri;
std::string method;
std::string content_encoding;
HTTPSessionPtr session;
std::istream * response_stream = nullptr;
UpdatableSessionPtr session;
std::istream * istr; /// owned by session
std::unique_ptr<ReadBuffer> impl;
std::function<void(std::ostream &)> out_stream_callback;
const Poco::Net::HTTPBasicCredentials & credentials;
std::vector<Poco::Net::HTTPCookie> cookies;
HTTPHeaderEntries http_header_entries;
const RemoteHostFilter * remote_host_filter = nullptr;
std::function<void(size_t)> next_callback;
CallResult(HTTPSessionPtr && session_, std::istream & response_stream_)
: session(session_)
, response_stream(&response_stream_)
{}
CallResult(CallResult &&) = default;
CallResult & operator= (CallResult &&) = default;
size_t buffer_size;
bool use_external_buffer;
size_t offset_from_begin_pos = 0;
HTTPRange read_range;
std::optional<HTTPFileInfo> file_info;
/// Delayed exception in case retries with partial content are not satisfiable.
std::exception_ptr exception;
bool retry_with_range_header = false;
/// In case of redirects, save result uri to use it if we retry the request.
std::optional<Poco::URI> saved_uri_redirect;
bool http_skip_not_found_url;
ReadSettings settings;
LoggerPtr log;
ProxyConfiguration proxy_config;
bool withPartialContent(const HTTPRange & range) const;
size_t getOffset() const;
void prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional<HTTPRange> range) const;
std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info = false);
size_t getFileSize() override;
bool supportsReadAt() override;
bool checkIfActuallySeekable() override;
String getFileName() const override;
enum class InitializeError
{
RETRYABLE_ERROR,
/// If error is not retriable, `exception` variable must be set.
NON_RETRYABLE_ERROR,
/// Allows to skip not found urls for globs
SKIP_NOT_FOUND_URL,
NONE,
};
InitializeError initialization_error = InitializeError::NONE;
private:
void getHeadResponse(Poco::Net::HTTPResponse & response);
void setupExternalBuffer();
public:
using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>;
explicit ReadWriteBufferFromHTTPBase(
UpdatableSessionPtr session_,
Poco::URI uri_,
const Poco::Net::HTTPBasicCredentials & credentials_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const ReadSettings & settings_ = {},
HTTPHeaderEntries http_header_entries_ = {},
const RemoteHostFilter * remote_host_filter_ = nullptr,
bool delay_initialization = false,
bool use_external_buffer_ = false,
bool http_skip_not_found_url_ = false,
std::optional<HTTPFileInfo> file_info_ = std::nullopt,
ProxyConfiguration proxy_config_ = {});
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false);
void call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false);
/**
* Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and
* saves exception into `exception` variable. In case url is not found and skip_not_found_url == true,
* sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws.
*/
void initialize();
bool nextImpl() override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> & progress_callback) override;
off_t getPosition() override;
off_t seek(off_t offset_, int whence) override;
void setReadUntilPosition(size_t until) override;
void setReadUntilEnd() override;
bool supportsRightBoundedReads() const override;
// If true, if we destroy impl now, no work was wasted. Just for metrics.
bool atEndOfRequestedRangeGuess();
std::string getResponseCookie(const std::string & name, const std::string & def) const;
/// Set function to call on each nextImpl, useful when you need to track
/// progress.
/// NOTE: parameter on each call is not incremental -- it's all bytes count
/// passed through the buffer
void setNextCallback(NextCallback next_callback_);
const std::string & getCompressionMethod() const;
std::optional<time_t> tryGetLastModificationTime();
HTTPFileInfo getFileInfo();
HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin);
std::unique_ptr<ReadBuffer> transformToReadBuffer(size_t buf_size) &&;
};
}
class SessionFactory
{
public:
explicit SessionFactory(const ConnectionTimeouts & timeouts_, ProxyConfiguration proxy_config_ = {});
const HTTPConnectionGroupType connection_group;
const Poco::URI initial_uri;
const std::string method;
const ProxyConfiguration proxy_config;
const ReadSettings read_settings;
const ConnectionTimeouts timeouts;
using SessionType = HTTPSessionPtr;
const Poco::Net::HTTPBasicCredentials & credentials;
const RemoteHostFilter * remote_host_filter;
SessionType buildNewSession(const Poco::URI & uri);
private:
ConnectionTimeouts timeouts;
ProxyConfiguration proxy_config;
};
const size_t buffer_size;
const size_t max_redirects;
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<SessionFactory>>>
{
using SessionType = UpdatableSession<SessionFactory>;
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<SessionType>>;
const bool use_external_buffer;
const bool http_skip_not_found_url;
std::function<void(std::ostream &)> out_stream_callback;
Poco::URI current_uri;
size_t redirects = 0;
std::string content_encoding;
std::unique_ptr<ReadBuffer> impl;
std::vector<Poco::Net::HTTPCookie> cookies;
HTTPHeaderEntries http_header_entries;
std::function<void(size_t)> next_callback;
size_t offset_from_begin_pos = 0;
HTTPRange read_range;
std::optional<HTTPFileInfo> file_info;
LoggerPtr log;
bool withPartialContent() const;
void prepareRequest(Poco::Net::HTTPRequest & request, std::optional<HTTPRange> range) const;
void doWithRetries(std::function<void()> && callable, std::function<void()> on_retry = nullptr, bool mute_logging = false) const;
CallResult callImpl(
Poco::Net::HTTPResponse & response,
const Poco::URI & uri_,
const std::string & method_,
const std::optional<HTTPRange> & range,
bool allow_redirects) const;
CallResult callWithRedirects(
Poco::Net::HTTPResponse & response,
const String & method_,
const std::optional<HTTPRange> & range);
std::unique_ptr<ReadBuffer> initialize();
size_t getFileSize() override;
bool supportsReadAt() override;
bool checkIfActuallySeekable() override;
String getFileName() const override;
void getHeadResponse(Poco::Net::HTTPResponse & response);
void setupExternalBuffer();
size_t getOffset() const;
// If true, if we destroy impl now, no work was wasted. Just for metrics.
bool atEndOfRequestedRangeGuess();
public:
using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>;
ReadWriteBufferFromHTTP(
Poco::URI uri_,
const HTTPConnectionGroupType & connection_group_,
const Poco::URI & uri_,
const std::string & method_,
OutStreamCallback out_stream_callback_,
const ConnectionTimeouts & timeouts,
const Poco::Net::HTTPBasicCredentials & credentials_,
const UInt64 max_redirects = 0,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const ReadSettings & settings_ = {},
const HTTPHeaderEntries & http_header_entries_ = {},
const RemoteHostFilter * remote_host_filter_ = nullptr,
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false,
std::optional<HTTPFileInfo> file_info_ = std::nullopt,
ProxyConfiguration proxy_config_ = {});
};
class PooledSessionFactory
{
public:
explicit PooledSessionFactory(
const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_);
using SessionType = PooledHTTPSessionPtr;
/// Thread safe.
SessionType buildNewSession(const Poco::URI & uri);
private:
ConnectionTimeouts timeouts;
size_t per_endpoint_pool_size;
};
using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>
{
using SessionType = UpdatableSession<PooledSessionFactory>;
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<SessionType>>;
public:
explicit PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_,
OutStreamCallback out_stream_callback_,
ProxyConfiguration proxy_config_,
ReadSettings read_settings_,
ConnectionTimeouts timeouts_,
const Poco::Net::HTTPBasicCredentials & credentials_,
const RemoteHostFilter * remote_host_filter_,
size_t buffer_size_,
const UInt64 max_redirects,
PooledSessionFactoryPtr session_factory);
size_t max_redirects_,
OutStreamCallback out_stream_callback_,
bool use_external_buffer_,
bool http_skip_not_found_url_,
HTTPHeaderEntries http_header_entries_,
bool delay_initialization,
std::optional<HTTPFileInfo> file_info_);
bool nextImpl() override;
size_t readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> & progress_callback) const override;
off_t seek(off_t offset_, int whence) override;
void setReadUntilPosition(size_t until) override;
void setReadUntilEnd() override;
bool supportsRightBoundedReads() const override;
off_t getPosition() override;
std::string getResponseCookie(const std::string & name, const std::string & def) const;
/// Set function to call on each nextImpl, useful when you need to track
/// progress.
/// NOTE: parameter on each call is not incremental -- it's all bytes count
/// passed through the buffer
void setNextCallback(NextCallback next_callback_);
const std::string & getCompressionMethod() const;
std::optional<time_t> tryGetLastModificationTime();
HTTPFileInfo getFileInfo();
static HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin);
};
using ReadWriteBufferFromHTTPPtr = std::unique_ptr<ReadWriteBufferFromHTTP>;
extern template class UpdatableSession<SessionFactory>;
extern template class UpdatableSession<PooledSessionFactory>;
extern template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<SessionFactory>>>;
extern template class detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>;
class BuilderRWBufferFromHTTP
{
Poco::URI uri;
std::string method = Poco::Net::HTTPRequest::HTTP_GET;
HTTPConnectionGroupType connection_group = HTTPConnectionGroupType::HTTP;
ProxyConfiguration proxy_config{};
ReadSettings read_settings{};
ConnectionTimeouts timeouts{};
const RemoteHostFilter * remote_host_filter = nullptr;
size_t buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
size_t max_redirects = 0;
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = nullptr;
bool use_external_buffer = false;
bool http_skip_not_found_url = false;
HTTPHeaderEntries http_header_entries{};
bool delay_initialization = true;
public:
BuilderRWBufferFromHTTP(Poco::URI uri_)
: uri(uri_)
{}
#define setterMember(name, member) \
BuilderRWBufferFromHTTP & name(decltype(BuilderRWBufferFromHTTP::member) arg_##member) \
{ \
member = std::move(arg_##member); \
return *this; \
}
setterMember(withConnectionGroup, connection_group)
setterMember(withMethod, method)
setterMember(withProxy, proxy_config)
setterMember(withSettings, read_settings)
setterMember(withTimeouts, timeouts)
setterMember(withHostFilter, remote_host_filter)
setterMember(withBufSize, buffer_size)
setterMember(withRedirects, max_redirects)
setterMember(withOutCallback, out_stream_callback)
setterMember(withHeaders, http_header_entries)
setterMember(withExternalBuf, use_external_buffer)
setterMember(withDelayInit, delay_initialization)
setterMember(withSkipNotFound, http_skip_not_found_url)
#undef setterMember
ReadWriteBufferFromHTTPPtr create(const Poco::Net::HTTPBasicCredentials & credentials_)
{
return std::make_unique<ReadWriteBufferFromHTTP>(
connection_group,
uri,
method,
proxy_config,
read_settings,
timeouts,
credentials_,
remote_host_filter,
buffer_size,
max_redirects,
out_stream_callback,
use_external_buffer,
http_skip_not_found_url,
http_header_entries,
delay_initialization,
/*file_info_=*/ std::nullopt);
}
};
}

View File

@ -1,5 +1,4 @@
#include <Poco/Timespan.h>
#include "Common/DNSResolver.h"
#include "config.h"
#if USE_AWS_S3
@ -147,9 +146,7 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio
.withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
.withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
.withTCPKeepAliveTimeout(Poco::Timespan(
client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))
.withHTTPKeepAliveTimeout(Poco::Timespan(
client_configuration.http_keep_alive_timeout_ms * 1000)); /// flag indicating whether keep-alive is enabled is set to each session upon creation
client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0));
}
PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration)
@ -164,8 +161,6 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
, get_request_throttler(client_configuration.get_request_throttler)
, put_request_throttler(client_configuration.put_request_throttler)
, extra_headers(client_configuration.extra_headers)
, http_connection_pool_size(client_configuration.http_connection_pool_size)
, wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit)
{
}
@ -308,12 +303,8 @@ void PocoHTTPClient::makeRequestInternal(
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.
const auto request_configuration = per_request_configuration();
if (http_connection_pool_size)
makeRequestInternalImpl<true>(request, request_configuration, response, readLimiter, writeLimiter);
else
makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);
makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter);
}
String getMethod(const Aws::Http::HttpRequest & request)
@ -335,7 +326,6 @@ String getMethod(const Aws::Http::HttpRequest & request)
}
}
template <bool pooled>
void PocoHTTPClient::makeRequestInternalImpl(
Aws::Http::HttpRequest & request,
const DB::ProxyConfiguration & proxy_configuration,
@ -343,8 +333,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
Aws::Utils::RateLimits::RateLimiterInterface *,
Aws::Utils::RateLimits::RateLimiterInterface *) const
{
using SessionPtr = std::conditional_t<pooled, PooledHTTPSessionPtr, HTTPSessionPtr>;
LoggerPtr log = getLogger("AWSClient");
auto uri = request.GetUri().GetURIString();
@ -396,40 +384,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt)
{
Poco::URI target_uri(uri);
SessionPtr session;
if (!proxy_configuration.host.empty())
{
if (enable_s3_requests_logging)
LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri);
/// Reverse proxy can replace host header with resolved ip address instead of host name.
/// This can lead to request signature difference on S3 side.
if constexpr (pooled)
session = makePooledHTTPSession(
target_uri,
getTimeouts(method, first_attempt, /*first_byte*/ true),
http_connection_pool_size,
wait_on_pool_size_limit,
proxy_configuration);
else
session = makeHTTPSession(
target_uri,
getTimeouts(method, first_attempt, /*first_byte*/ true),
proxy_configuration);
}
else
{
if constexpr (pooled)
session = makePooledHTTPSession(
target_uri,
getTimeouts(method, first_attempt, /*first_byte*/ true),
http_connection_pool_size,
wait_on_pool_size_limit);
else
session = makeHTTPSession(
target_uri,
getTimeouts(method, first_attempt, /*first_byte*/ true));
}
if (enable_s3_requests_logging && !proxy_configuration.isEmpty())
LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri);
auto group = for_disk_s3 ? HTTPConnectionGroupType::DISK : HTTPConnectionGroupType::STORAGE;
auto session = makeHTTPSession(
group,
target_uri,
getTimeouts(method, first_attempt, /*first_byte*/ true),
proxy_configuration);
/// In case of error this address will be written to logs
request.SetResolvedRemoteHost(session->getResolvedAddress());
@ -612,10 +577,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
response->SetClientErrorMessage(getCurrentExceptionMessage(false));
addMetric(request, S3MetricType::Errors);
/// Probably this is socket timeout or something more or less related to DNS
/// Let's just remove this host from DNS cache to be more safe
DNSResolver::instance().removeHostFromCache(Poco::URI(uri).getHost());
}
}

View File

@ -13,7 +13,7 @@
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <IO/HTTPHeaderEntries.h>
#include <IO/S3/SessionAwareIOStream.h>
#include <IO/SessionAwareIOStream.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/core/http/HttpClient.h>
@ -49,12 +49,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
ThrottlerPtr put_request_throttler;
HTTPHeaderEntries extra_headers;
/// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established.
uint32_t http_keep_alive_timeout_ms = 0;
/// Zero means pooling will not be used.
size_t http_connection_pool_size = 0;
/// See PoolBase::BehaviourOnLimit
bool wait_on_pool_size_limit = true;
bool s3_use_adaptive_timeouts = true;
std::function<void(const DB::ProxyConfiguration &)> error_report;
@ -98,12 +93,6 @@ public:
);
}
void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT
{
body_stream = Aws::Utils::Stream::ResponseStream(
Aws::New<SessionAwareIOStream<PooledHTTPSessionPtr>>("http result streambuf", session_, incoming_stream.rdbuf()));
}
void SetResponseBody(std::string & response_body) /// NOLINT
{
auto stream = Aws::New<std::stringstream>("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
@ -163,7 +152,6 @@ private:
EnumSize,
};
template <bool pooled>
void makeRequestInternalImpl(
Aws::Http::HttpRequest & request,
const DB::ProxyConfiguration & proxy_configuration,
@ -196,9 +184,6 @@ protected:
ThrottlerPtr put_request_throttler;
const HTTPHeaderEntries extra_headers;
size_t http_connection_pool_size = 0;
bool wait_on_pool_size_limit = true;
};
}

View File

@ -82,7 +82,7 @@ public:
/// (e.g. next() or supportsReadAt()).
/// * Performance: there's no buffering. Each readBigAt() call typically translates into actual
/// IO operation (e.g. HTTP request). Don't use it for small adjacent reads.
virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function<bool(size_t m)> & /*progress_callback*/ = nullptr)
virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function<bool(size_t m)> & /*progress_callback*/ = nullptr) const
{ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readBigAt() not implemented"); }
/// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat).

View File

@ -3,7 +3,7 @@
#include <iosfwd>
namespace DB::S3
namespace DB
{
/**
* Wrapper of IOStream to store response stream and corresponding HTTP session.

View File

@ -7,6 +7,7 @@ namespace DB
{
WriteBufferFromHTTP::WriteBufferFromHTTP(
const HTTPConnectionGroupType & connection_group,
const Poco::URI & uri,
const std::string & method,
const std::string & content_type,
@ -14,9 +15,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
const HTTPHeaderEntries & additional_headers,
const ConnectionTimeouts & timeouts,
size_t buffer_size_,
ProxyConfiguration proxy_configuration)
ProxyConfiguration proxy_configuration
)
: WriteBufferFromOStream(buffer_size_)
, session{makeHTTPSession(uri, timeouts, proxy_configuration)}
, session{makeHTTPSession(connection_group, uri, timeouts, proxy_configuration)}
, request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1}
{
request.setHost(uri.getHost());

View File

@ -19,7 +19,8 @@ namespace DB
class WriteBufferFromHTTP : public WriteBufferFromOStream
{
public:
explicit WriteBufferFromHTTP(const Poco::URI & uri,
explicit WriteBufferFromHTTP(const HTTPConnectionGroupType & connection_group,
const Poco::URI & uri,
const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only
const std::string & content_type = "",
const std::string & content_encoding = "",

View File

@ -35,7 +35,7 @@ void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t
}
if (check_bytes && bytes > 0)
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF.");
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF, left to copy {} bytes.", bytes);
}
void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t bytes, std::function<void()> cancellation_hook, ThrottlerPtr throttler)

View File

@ -1362,7 +1362,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
size_t num_result_columns = result.size();
if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns)
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match");
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match (source: {} and result: {})", num_input_columns, num_result_columns);
if (add_casted_columns && mode != MatchColumnsMode::Name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name");

View File

@ -33,26 +33,26 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription()
});
return ColumnsDescription{
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname of the server executing the query."},
{"event_date", std::make_shared<DataTypeDate>(), "The date when the async insert happened."},
{"event_time", std::make_shared<DataTypeDateTime>(), "The date and time when the async insert finished execution."},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6), "The date and time when the async insert finished execution with microseconds precision."},
{"query", std::make_shared<DataTypeString>()},
{"database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"format", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"query_id", std::make_shared<DataTypeString>()},
{"bytes", std::make_shared<DataTypeUInt64>()},
{"rows", std::make_shared<DataTypeUInt64>()},
{"exception", std::make_shared<DataTypeString>()},
{"status", type_status},
{"data_kind", type_data_kind},
{"query", std::make_shared<DataTypeString>(), "Query string."},
{"database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of the database the table is in."},
{"table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Table name."},
{"format", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Format name."},
{"query_id", std::make_shared<DataTypeString>(), "ID of the initial query."},
{"bytes", std::make_shared<DataTypeUInt64>(), "Number of inserted bytes."},
{"rows", std::make_shared<DataTypeUInt64>(), "Number of inserted rows."},
{"exception", std::make_shared<DataTypeString>(), "Exception message."},
{"status", type_status, "Status of the view. Values: 'Ok' = 1 — Successful insert, 'ParsingError' = 2 — Exception when parsing the data, 'FlushError' = 3 — Exception when flushing the data"},
{"data_kind", type_data_kind, "The status of the data. Value: 'Parsed' and 'Preprocessed'."},
{"flush_time", std::make_shared<DataTypeDateTime>()},
{"flush_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"flush_query_id", std::make_shared<DataTypeString>()},
{"timeout_milliseconds", std::make_shared<DataTypeUInt64>()},
{"flush_time", std::make_shared<DataTypeDateTime>(), "The date and time when the flush happened."},
{"flush_time_microseconds", std::make_shared<DataTypeDateTime64>(6), "The date and time when the flush happened with microseconds precision."},
{"flush_query_id", std::make_shared<DataTypeString>(), "ID of the flush query."},
{"timeout_milliseconds", std::make_shared<DataTypeUInt64>(), "The adaptive timeout calculated for this entry."},
};
}

View File

@ -22,24 +22,24 @@ ColumnsDescription BackupLogElement::getColumnsDescription()
{
return ColumnsDescription
{
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"id", std::make_shared<DataTypeString>()},
{"name", std::make_shared<DataTypeString>()},
{"base_backup_name", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>()},
{"status", std::make_shared<DataTypeEnum8>(getBackupStatusEnumValues())},
{"error", std::make_shared<DataTypeString>()},
{"start_time", std::make_shared<DataTypeDateTime>()},
{"end_time", std::make_shared<DataTypeDateTime>()},
{"num_files", std::make_shared<DataTypeUInt64>()},
{"total_size", std::make_shared<DataTypeUInt64>()},
{"num_entries", std::make_shared<DataTypeUInt64>()},
{"uncompressed_size", std::make_shared<DataTypeUInt64>()},
{"compressed_size", std::make_shared<DataTypeUInt64>()},
{"files_read", std::make_shared<DataTypeUInt64>()},
{"bytes_read", std::make_shared<DataTypeUInt64>()},
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname of the server executing the query."},
{"event_date", std::make_shared<DataTypeDate>(), "Date of the entry."},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6), "Time of the entry with microseconds precision."},
{"id", std::make_shared<DataTypeString>(), "Identifier of the backup or restore operation."},
{"name", std::make_shared<DataTypeString>(), "Name of the backup storage (the contents of the FROM or TO clause)."},
{"base_backup_name", std::make_shared<DataTypeString>(), "The name of base backup in case incremental one."},
{"query_id", std::make_shared<DataTypeString>(), "The ID of a query associated with a backup operation."},
{"status", std::make_shared<DataTypeEnum8>(getBackupStatusEnumValues()), "Operation status."},
{"error", std::make_shared<DataTypeString>(), "Error message of the failed operation (empty string for successful operations)."},
{"start_time", std::make_shared<DataTypeDateTime>(), "Start time of the operation."},
{"end_time", std::make_shared<DataTypeDateTime>(), "End time of the operation."},
{"num_files", std::make_shared<DataTypeUInt64>(), "Number of files stored in the backup."},
{"total_size", std::make_shared<DataTypeUInt64>(), "Total size of files stored in the backup."},
{"num_entries", std::make_shared<DataTypeUInt64>(), "Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as num_files if it's an incremental backup or if it contains empty files or duplicates. The following is always true: num_entries <= num_files."},
{"uncompressed_size", std::make_shared<DataTypeUInt64>(), "Uncompressed size of the backup."},
{"compressed_size", std::make_shared<DataTypeUInt64>(), "Compressed size of the backup. If the backup is not stored as an archive it equals to uncompressed_size."},
{"files_read", std::make_shared<DataTypeUInt64>(), "Number of files read during the restore operation."},
{"bytes_read", std::make_shared<DataTypeUInt64>(), "Total size of files read during the restore operation."},
};
}

View File

@ -26,23 +26,23 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription()
return ColumnsDescription
{
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"event_date", std::make_shared<DataTypeDate>(), "Date of the event."},
{"event_time", std::make_shared<DataTypeDateTime>(), "Time of the event."},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6), "Time of the event with microseconds precision."},
{"event_type", event_enum_type},
{"event_type", event_enum_type, "Type of the event. Possible values: 'Upload', 'Delete', 'MultiPartUploadCreate', 'MultiPartUploadWrite', 'MultiPartUploadComplete', 'MultiPartUploadAbort'"},
{"query_id", std::make_shared<DataTypeString>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"thread_name", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>(), "Identifier of the query associated with the event, if any."},
{"thread_id", std::make_shared<DataTypeUInt64>(), "Identifier of the thread performing the operation."},
{"thread_name", std::make_shared<DataTypeString>(), "Name of the thread performing the operation."},
{"disk_name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"bucket", std::make_shared<DataTypeString>()},
{"remote_path", std::make_shared<DataTypeString>()},
{"local_path", std::make_shared<DataTypeString>()},
{"data_size", std::make_shared<DataTypeUInt64>()},
{"disk_name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Name of the associated disk."},
{"bucket", std::make_shared<DataTypeString>(), "Name of the bucket."},
{"remote_path", std::make_shared<DataTypeString>(), "Path to the remote resource."},
{"local_path", std::make_shared<DataTypeString>(), "Path to the metadata file on the local system, which references the remote resource."},
{"data_size", std::make_shared<DataTypeUInt64>(), "Size of the data involved in the upload event."},
{"error", std::make_shared<DataTypeString>()},
{"error", std::make_shared<DataTypeString>(), "Error message associated with the event, if any."},
};
}

View File

@ -380,8 +380,6 @@ struct ContextSharedPart : boost::noncopyable
OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex);
OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex);
OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex);
/// The global pool of HTTP sessions for background fetches.
PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex);
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml
@ -5039,11 +5037,6 @@ void Context::initializeBackgroundExecutorsIfNeeded()
);
LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef());
/// The number of background fetches is limited by the number of threads in the background thread pool.
/// It doesn't make any sense to limit the number of connections per host any further.
shared->fetches_session_factory = std::make_shared<PooledSessionFactory>(timeouts, background_fetches_pool_size);
shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>
(
"Fetch",
@ -5097,12 +5090,6 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const
return shared->common_executor;
}
PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const
{
SharedLockGuard lock(shared->background_executors_mutex);
return shared->fetches_session_factory;
}
IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
{
callOnce(shared->readers_initialized, [&] {

View File

@ -206,9 +206,6 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;
class PreparedSetsCache;
using PreparedSetsCachePtr = std::shared_ptr<PreparedSetsCache>;
class PooledSessionFactory;
using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
class SessionTracker;
struct ServerSettings;
@ -1226,7 +1223,6 @@ public:
OrdinaryBackgroundExecutorPtr getMovesExecutor() const;
OrdinaryBackgroundExecutorPtr getFetchesExecutor() const;
OrdinaryBackgroundExecutorPtr getCommonExecutor() const;
PooledSessionFactoryPtr getCommonFetchesSessionFactory() const;
IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const;
#if USE_LIBURING

View File

@ -23,18 +23,18 @@ ColumnsDescription CrashLogElement::getColumnsDescription()
{
return ColumnsDescription
{
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"timestamp_ns", std::make_shared<DataTypeUInt64>()},
{"signal", std::make_shared<DataTypeInt32>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"query_id", std::make_shared<DataTypeString>()},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"trace_full", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"version", std::make_shared<DataTypeString>()},
{"revision", std::make_shared<DataTypeUInt32>()},
{"build_id", std::make_shared<DataTypeString>()},
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The hostname where the crash occurred."},
{"event_date", std::make_shared<DataTypeDate>(), "The date of the crash."},
{"event_time", std::make_shared<DataTypeDateTime>(), "The time of the crash."},
{"timestamp_ns", std::make_shared<DataTypeUInt64>(), "Timestamp of the event with nanoseconds."},
{"signal", std::make_shared<DataTypeInt32>(), "Signal number."},
{"thread_id", std::make_shared<DataTypeUInt64>(), "Thread ID."},
{"query_id", std::make_shared<DataTypeString>(), "Query ID."},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process."},
{"trace_full", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process."},
{"version", std::make_shared<DataTypeString>(), "ClickHouse server version."},
{"revision", std::make_shared<DataTypeUInt32>(), "ClickHouse server revision."},
{"build_id", std::make_shared<DataTypeString>(), "BuildID that is generated by compiler."},
};
}

View File

@ -44,6 +44,11 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const
{
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
}
catch (const DB::NetException &)
{
/// Avoid "Host not found" exceptions
return false;
}
catch (const Poco::Net::NetException &)
{
/// Avoid "Host not found" exceptions

View File

@ -38,20 +38,20 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
return ColumnsDescription
{
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"query_id", std::make_shared<DataTypeString>()},
{"source_file_path", std::make_shared<DataTypeString>()},
{"file_segment_range", std::make_shared<DataTypeTuple>(types)},
{"total_requested_range", std::make_shared<DataTypeTuple>(types)},
{"key", std::make_shared<DataTypeString>()},
{"offset", std::make_shared<DataTypeUInt64>()},
{"size", std::make_shared<DataTypeUInt64>()},
{"read_type", std::make_shared<DataTypeString>()},
{"read_from_cache_attempted", std::make_shared<DataTypeUInt8>()},
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
{"read_buffer_id", std::make_shared<DataTypeString>()},
{"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname"},
{"event_date", std::make_shared<DataTypeDate>(), "Event date"},
{"event_time", std::make_shared<DataTypeDateTime>(), "Event time"},
{"query_id", std::make_shared<DataTypeString>(), "Id of the query"},
{"source_file_path", std::make_shared<DataTypeString>(), "File segment path on filesystem"},
{"file_segment_range", std::make_shared<DataTypeTuple>(types), "File segment range"},
{"total_requested_range", std::make_shared<DataTypeTuple>(types), "Full read range"},
{"key", std::make_shared<DataTypeString>(), "File segment key"},
{"offset", std::make_shared<DataTypeUInt64>(), "File segment offset"},
{"size", std::make_shared<DataTypeUInt64>(), "Read size"},
{"read_type", std::make_shared<DataTypeString>(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"},
{"read_from_cache_attempted", std::make_shared<DataTypeUInt8>(), "Whether reading from cache was attempted"},
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>()), "Profile events collected while reading this file segment"},
{"read_buffer_id", std::make_shared<DataTypeString>(), "Internal implementation read buffer id"},
};
}

View File

@ -2111,8 +2111,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis
if (does_storage_support_prewhere && shouldMoveToPrewhere())
{
/// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
analysis.prewhere_info
= std::make_shared<PrewhereInfo>(std::move(analysis.filter_info->actions), std::move(analysis.filter_info->column_name));
analysis.prewhere_info = std::make_shared<PrewhereInfo>(analysis.filter_info->actions, analysis.filter_info->column_name);
analysis.prewhere_info->prewhere_actions->projectInput(false);
analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column;
analysis.prewhere_info->need_filter = true;
@ -2122,8 +2121,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis
else
{
/// Add row level security actions to prewhere.
analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions);
analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name);
analysis.prewhere_info->row_level_filter = analysis.filter_info->actions;
analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name;
analysis.prewhere_info->row_level_filter->projectInput(false);
analysis.filter_info = nullptr;
}

View File

@ -11,6 +11,7 @@
#include <Common/CurrentMetrics.h>
#include <Common/FailPoint.h>
#include <Common/PageCache.h>
#include <Common/HostResolvePool.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Interpreters/Cache/FileCache.h>
#include <Interpreters/Context.h>
@ -333,10 +334,17 @@ BlockIO InterpreterSystemQuery::execute()
{
getContext()->checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE);
DNSResolver::instance().dropCache();
HostResolversPool::instance().dropCache();
/// Reinitialize clusters to update their resolved_addresses
system_context->reloadClusterConfig();
break;
}
case Type::DROP_CONNECTIONS_CACHE:
{
getContext()->checkAccess(AccessType::SYSTEM_DROP_CONNECTIONS_CACHE);
HTTPConnectionPools::instance().dropCache();
break;
}
case Type::DROP_MARK_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE);
system_context->clearMarkCache();
@ -1201,6 +1209,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
break;
}
case Type::DROP_DNS_CACHE:
case Type::DROP_CONNECTIONS_CACHE:
case Type::DROP_MARK_CACHE:
case Type::DROP_MMAP_CACHE:
case Type::DROP_QUERY_CACHE:

Some files were not shown because too many files have changed in this diff Show More