Merge branch 'master' into Azure_fix_stateless_tests

This commit is contained in:
Smita Kulkarni 2024-05-29 13:16:48 +02:00
commit 1b084fcc00
247 changed files with 3210 additions and 2404 deletions

View File

@ -37,6 +37,7 @@ Checks: [
'-cert-oop54-cpp',
'-cert-oop57-cpp',
'-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667
'-clang-analyzer-optin.performance.Padding',
'-clang-analyzer-unix.Malloc',
@ -94,6 +95,7 @@ Checks: [
'-modernize-pass-by-value',
'-modernize-return-braced-init-list',
'-modernize-use-auto',
'-modernize-use-constraints', # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
'-modernize-use-default-member-init',
'-modernize-use-emplace',
'-modernize-use-nodiscard',
@ -121,7 +123,8 @@ Checks: [
'-readability-magic-numbers',
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-redundant-inline-specifier',
'-readability-redundant-inline-specifier', # useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE, base/base/defines.h).
# ALWAYS_INLINE only has an effect if combined with `inline`: https://godbolt.org/z/Eefd74qdM
'-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated
# initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct
# initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly
@ -132,12 +135,7 @@ Checks: [
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',
'-zircon-*',
# This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
'-modernize-use-constraints',
# https://github.com/abseil/abseil-cpp/issues/1667
'-clang-analyzer-optin.core.EnumCastOutOfRange'
'-zircon-*'
]
WarningsAsErrors: '*'

View File

@ -11,6 +11,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py
- Backward Incompatible Change
- Build/Testing/Packaging Improvement
- Documentation (changelog entry is not required)
- Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
- Bug Fix (user-visible misbehavior in an official stable release)
- CI Fix or Improvement (changelog entry is not required)
- Not for changelog (changelog entry is not required)

View File

@ -86,7 +86,7 @@ public:
}
/// Return object into pool. Client must return same object that was borrowed.
inline void returnObject(T && object_to_return)
void returnObject(T && object_to_return)
{
{
std::lock_guard lock(objects_mutex);
@ -99,20 +99,20 @@ public:
}
/// Max pool size
inline size_t maxSize() const
size_t maxSize() const
{
return max_size;
}
/// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
inline size_t allocatedObjectsSize() const
size_t allocatedObjectsSize() const
{
std::lock_guard lock(objects_mutex);
return allocated_objects_size;
}
/// Returns allocatedObjectsSize == maxSize
inline bool isFull() const
bool isFull() const
{
std::lock_guard lock(objects_mutex);
return allocated_objects_size == max_size;
@ -120,7 +120,7 @@ public:
/// Borrowed objects size. If borrowedObjectsSize == allocatedObjectsSize and pool is full.
/// Then client will wait during borrowObject function call.
inline size_t borrowedObjectsSize() const
size_t borrowedObjectsSize() const
{
std::lock_guard lock(objects_mutex);
return borrowed_objects_size;
@ -129,7 +129,7 @@ public:
private:
template <typename FactoryFunc>
inline T allocateObjectForBorrowing(const std::unique_lock<std::mutex> &, FactoryFunc && func)
T allocateObjectForBorrowing(const std::unique_lock<std::mutex> &, FactoryFunc && func)
{
++allocated_objects_size;
++borrowed_objects_size;
@ -137,7 +137,7 @@ private:
return std::forward<FactoryFunc>(func)();
}
inline T borrowFromObjects(const std::unique_lock<std::mutex> &)
T borrowFromObjects(const std::unique_lock<std::mutex> &)
{
T dst;
detail::moveOrCopyIfThrow(std::move(objects.back()), dst);

View File

@ -1235,6 +1235,168 @@ Result:
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
## toStartOfMillisecond
Rounds down a date with time to the start of the milliseconds.
**Syntax**
``` sql
toStartOfMillisecond(value, [timezone])
```
**Arguments**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Input value with sub-milliseconds. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Examples**
Query without timezone:
``` sql
WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64
SELECT toStartOfMillisecond(dt64);
```
Result:
``` text
┌────toStartOfMillisecond(dt64)─┐
│ 2020-01-01 10:20:30.999000000 │
└───────────────────────────────┘
```
Query with timezone:
``` sql
┌─toStartOfMillisecond(dt64, 'Asia/Istanbul')─┐
│ 2020-01-01 12:20:30.999000000 │
└─────────────────────────────────────────────┘
```
Result:
``` text
┌─toStartOfMillisecond(dt64, 'Asia/Istanbul')─┐
│ 2020-01-01 12:20:30.999 │
└─────────────────────────────────────────────┘
```
## toStartOfMicrosecond
Rounds down a date with time to the start of the microseconds.
**Syntax**
``` sql
toStartOfMicrosecond(value, [timezone])
```
**Arguments**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Input value with sub-microseconds. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Examples**
Query without timezone:
``` sql
WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64
SELECT toStartOfMicrosecond(dt64);
```
Result:
``` text
┌────toStartOfMicrosecond(dt64)─┐
│ 2020-01-01 10:20:30.999999000 │
└───────────────────────────────┘
```
Query with timezone:
``` sql
WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64
SELECT toStartOfMicrosecond(dt64, 'Asia/Istanbul');
```
Result:
``` text
┌─toStartOfMicrosecond(dt64, 'Asia/Istanbul')─┐
│ 2020-01-01 12:20:30.999999000 │
└─────────────────────────────────────────────┘
```
**See also**
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
## toStartOfNanosecond
Rounds down a date with time to the start of the nanoseconds.
**Syntax**
``` sql
toStartOfNanosecond(value, [timezone])
```
**Arguments**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Input value with nanoseconds. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Examples**
Query without timezone:
``` sql
WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64
SELECT toStartOfNanosecond(dt64);
```
Result:
``` text
┌─────toStartOfNanosecond(dt64)─┐
│ 2020-01-01 10:20:30.999999999 │
└───────────────────────────────┘
```
Query with timezone:
``` sql
WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64
SELECT toStartOfNanosecond(dt64, 'Asia/Istanbul');
```
Result:
``` text
┌─toStartOfNanosecond(dt64, 'Asia/Istanbul')─┐
│ 2020-01-01 12:20:30.999999999 │
└────────────────────────────────────────────┘
```
**See also**
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
## toStartOfFiveMinutes
Rounds down a date with time to the start of the five-minute interval.
@ -3953,6 +4115,43 @@ Result:
│ 2023-03-16 18:00:00.000 │
└─────────────────────────────────────────────────────────────────────────┘
```
## UTCTimestamp
Returns the current date and time at the moment of query analysis. The function is a constant expression.
:::note
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage.
:::
**Syntax**
```sql
UTCTimestamp()
```
Alias: `UTC_timestamp`.
**Returned value**
- Returns the current date and time at the moment of query analysis. [DateTime](../data-types/datetime.md).
**Example**
Query:
```sql
SELECT UTCTimestamp();
```
Result:
```response
┌──────UTCTimestamp()─┐
│ 2024-05-28 08:32:09 │
└─────────────────────┘
```
## timeDiff
Returns the difference between two dates or dates with time values. The difference is calculated in units of seconds. It is same as `dateDiff` and was added only for MySQL support. `dateDiff` is preferred.

View File

@ -746,71 +746,6 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2);
└────────────────────────┴────────────────────────┘
```
## generateSnowflakeIDThreadMonotonic
Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests.
Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
```
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|0| timestamp |
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
| | machine_id | machine_seq_num |
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
```
**Syntax**
``` sql
generateSnowflakeIDThreadMonotonic([expr])
```
**Arguments**
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
**Returned value**
A value of type UInt64.
**Example**
First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
``` sql
CREATE TABLE tab (id UInt64) ENGINE = Memory;
INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic();
SELECT * FROM tab;
```
Result:
```response
┌──────────────────id─┐
│ 7199082832006627328 │
└─────────────────────┘
```
**Example with multiple Snowflake IDs generated per row**
```sql
SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2);
┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐
│ 7199082940311945216 │ 7199082940316139520 │
└───────────────────────────────────────┴───────────────────────────────────────┘
```
## snowflakeToDateTime
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.

View File

@ -79,8 +79,6 @@ ORDER BY ts, event_type;
│ 2020-01-03 00:00:00 │ imp │ │ 2 │ 0 │
└─────────────────────┴────────────┴─────────┴────────────┴──────┘
SET allow_experimental_alter_materialized_view_structure=1;
ALTER TABLE mv MODIFY QUERY
SELECT toStartOfDay(ts) ts, event_type, browser,
count() events_cnt,
@ -178,7 +176,6 @@ SELECT * FROM mv;
└───┘
```
```sql
set allow_experimental_alter_materialized_view_structure=1;
ALTER TABLE mv MODIFY QUERY SELECT a * 2 as a FROM src_table;
INSERT INTO src_table (a) VALUES (3), (4);
SELECT * FROM mv;

View File

@ -206,6 +206,32 @@ Enables background data distribution when inserting data into distributed tables
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### STOP LISTEN
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
```sql
SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
### START LISTEN
Allows new connections to be established on the specified protocols.
However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
```sql
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```
## Managing MergeTree Tables
ClickHouse can manage background processes in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
@ -463,30 +489,16 @@ Will do sync syscall.
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
### UNLOAD PRIMARY KEY
## SYSTEM STOP LISTEN
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
Unload the primary keys for the given table or for all tables.
```sql
SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
SYSTEM UNLOAD PRIMARY KEY [db.]name
```
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
## SYSTEM START LISTEN
Allows new connections to be established on the specified protocols.
However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
```sql
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
SYSTEM UNLOAD PRIMARY KEY
```
## Managing Refreshable Materialized Views {#refreshable-materialized-views}
@ -495,7 +507,7 @@ Commands to control background tasks performed by [Refreshable Materialized View
Keep an eye on [`system.view_refreshes`](../../operations/system-tables/view_refreshes.md) while using them.
### SYSTEM REFRESH VIEW
### REFRESH VIEW
Trigger an immediate out-of-schedule refresh of a given view.
@ -503,7 +515,7 @@ Trigger an immediate out-of-schedule refresh of a given view.
SYSTEM REFRESH VIEW [db.]name
```
### SYSTEM STOP VIEW, SYSTEM STOP VIEWS
### STOP VIEW, STOP VIEWS
Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too.
@ -514,7 +526,7 @@ SYSTEM STOP VIEW [db.]name
SYSTEM STOP VIEWS
```
### SYSTEM START VIEW, SYSTEM START VIEWS
### START VIEW, START VIEWS
Enable periodic refreshing for the given view or all refreshable views. No immediate refresh is triggered.
@ -525,22 +537,10 @@ SYSTEM START VIEW [db.]name
SYSTEM START VIEWS
```
### SYSTEM CANCEL VIEW
### CANCEL VIEW
If there's a refresh in progress for the given view, interrupt and cancel it. Otherwise do nothing.
```sql
SYSTEM CANCEL VIEW [db.]name
```
### SYSTEM UNLOAD PRIMARY KEY
Unload the primary keys for the given table or for all tables.
```sql
SYSTEM UNLOAD PRIMARY KEY [db.]name
```
```sql
SYSTEM UNLOAD PRIMARY KEY
```

View File

@ -23,7 +23,7 @@ public:
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
static constexpr inline auto FORMAT = "RowBinary";
static constexpr auto FORMAT = "RowBinary";
const size_t keep_alive_timeout;
LoggerPtr log;

View File

@ -56,10 +56,10 @@ private:
std::condition_variable cond;
std::optional<ThreadFromGlobalPool> thread;
static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents.";
static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative.";
static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents.";
static constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative.";
static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
};
}

View File

@ -341,7 +341,7 @@ public:
value[i] = Node::read(buf, arena);
}
inline std::optional<size_t> getBaseIndex(Data & data) const
std::optional<size_t> getBaseIndex(Data & data) const
{
if (data.value.size() == 0)
return {};

View File

@ -73,7 +73,7 @@ private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
inline bool singleFilter(const IColumn ** columns, size_t row_num) const
bool singleFilter(const IColumn ** columns, size_t row_num) const
{
const IColumn * filter_column = columns[num_arguments - 1];
@ -261,7 +261,7 @@ public:
filter_is_only_null = arguments.back()->onlyNull();
}
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
static bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
{
return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
}

View File

@ -138,7 +138,7 @@ class QuantileTDigest
compress();
}
inline bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean)
bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean)
{
return l_mean == r_mean || (!std::isinf(l_mean) && !std::isinf(r_mean));
}

View File

@ -262,7 +262,7 @@ namespace detail
UInt64 count_big[BIG_SIZE];
/// Get value of quantile by index in array `count_big`.
static inline UInt16 indexInBigToValue(size_t i)
static UInt16 indexInBigToValue(size_t i)
{
return (i * BIG_PRECISION) + SMALL_THRESHOLD
+ (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// A small randomization so that it is not noticeable that all the values are even.

View File

@ -24,14 +24,14 @@ private:
std::unique_ptr<datasketches::update_theta_sketch> sk_update;
std::unique_ptr<datasketches::theta_union> sk_union;
inline datasketches::update_theta_sketch * getSkUpdate()
datasketches::update_theta_sketch * getSkUpdate()
{
if (!sk_update)
sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
return sk_update.get();
}
inline datasketches::theta_union * getSkUnion()
datasketches::theta_union * getSkUnion()
{
if (!sk_union)
sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());

View File

@ -38,7 +38,7 @@ bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types);
template <>
struct UniqVariadicHash<false, false>
{
static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
{
UInt64 hash;
@ -65,7 +65,7 @@ struct UniqVariadicHash<false, false>
template <>
struct UniqVariadicHash<false, true>
{
static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
{
UInt64 hash;
@ -94,7 +94,7 @@ struct UniqVariadicHash<false, true>
template <>
struct UniqVariadicHash<true, false>
{
static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
{
const IColumn ** column = columns;
const IColumn ** columns_end = column + num_args;
@ -114,7 +114,7 @@ struct UniqVariadicHash<true, false>
template <>
struct UniqVariadicHash<true, true>
{
static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
{
const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();

View File

@ -105,14 +105,14 @@ private:
}
}
inline size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT
inline size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT
inline size_t mask() const { return buf_size() - 1; }
size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT
size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT
size_t mask() const { return buf_size() - 1; }
inline size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
/// The value is divided by 2 ^ skip_degree
inline bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); }
bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); }
HashValue hash(Value key) const { return static_cast<HashValue>(Hash()(key)); }

View File

@ -173,13 +173,13 @@ private:
return arithmetic_function_clone;
}
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();

View File

@ -184,7 +184,7 @@ private:
return result_function;
}
inline QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const
QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const
{
return makeComparisonFunction(std::move(lhs_argument), std::move(rhs_argument), "equals");
}

View File

@ -215,7 +215,7 @@ public:
}
private:
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));

View File

@ -59,7 +59,7 @@ public:
}
}
private:
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);

View File

@ -108,7 +108,7 @@ public:
}
private:
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();

View File

@ -110,7 +110,7 @@ private:
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),

View File

@ -156,7 +156,7 @@ public:
}
private:
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
@ -165,7 +165,7 @@ private:
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{
auto multiply_function_node = std::make_shared<FunctionNode>("multiply");
auto & multiply_arguments_nodes = multiply_function_node->getArguments().getNodes();

View File

@ -0,0 +1,124 @@
#pragma once
#include <IO/Operators.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Analyzer/FunctionNode.h>
namespace DB
{
class ExpressionsStack
{
public:
void push(const QueryTreeNodePtr & node)
{
if (node->hasAlias())
{
const auto & node_alias = node->getAlias();
alias_name_to_expressions[node_alias].push_back(node);
}
if (const auto * function = node->as<FunctionNode>())
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
++aggregate_functions_counter;
}
expressions.emplace_back(node);
}
void pop()
{
const auto & top_expression = expressions.back();
const auto & top_expression_alias = top_expression->getAlias();
if (!top_expression_alias.empty())
{
auto it = alias_name_to_expressions.find(top_expression_alias);
auto & alias_expressions = it->second;
alias_expressions.pop_back();
if (alias_expressions.empty())
alias_name_to_expressions.erase(it);
}
if (const auto * function = top_expression->as<FunctionNode>())
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
--aggregate_functions_counter;
}
expressions.pop_back();
}
[[maybe_unused]] const QueryTreeNodePtr & getRoot() const
{
return expressions.front();
}
const QueryTreeNodePtr & getTop() const
{
return expressions.back();
}
[[maybe_unused]] bool hasExpressionWithAlias(const std::string & alias) const
{
return alias_name_to_expressions.contains(alias);
}
bool hasAggregateFunction() const
{
return aggregate_functions_counter > 0;
}
QueryTreeNodePtr getExpressionWithAlias(const std::string & alias) const
{
auto expression_it = alias_name_to_expressions.find(alias);
if (expression_it == alias_name_to_expressions.end())
return {};
return expression_it->second.front();
}
[[maybe_unused]] size_t size() const
{
return expressions.size();
}
bool empty() const
{
return expressions.empty();
}
void dump(WriteBuffer & buffer) const
{
buffer << expressions.size() << '\n';
for (const auto & expression : expressions)
{
buffer << "Expression ";
buffer << expression->formatASTForErrorMessage();
const auto & alias = expression->getAlias();
if (!alias.empty())
buffer << " alias " << alias;
buffer << '\n';
}
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
private:
QueryTreeNodes expressions;
size_t aggregate_functions_counter = 0;
std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
};
}

View File

@ -0,0 +1,195 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>
namespace DB
{
/// Identifier lookup context
enum class IdentifierLookupContext : uint8_t
{
EXPRESSION = 0,
FUNCTION,
TABLE_EXPRESSION,
};
inline const char * toString(IdentifierLookupContext identifier_lookup_context)
{
switch (identifier_lookup_context)
{
case IdentifierLookupContext::EXPRESSION: return "EXPRESSION";
case IdentifierLookupContext::FUNCTION: return "FUNCTION";
case IdentifierLookupContext::TABLE_EXPRESSION: return "TABLE_EXPRESSION";
}
}
inline const char * toStringLowercase(IdentifierLookupContext identifier_lookup_context)
{
switch (identifier_lookup_context)
{
case IdentifierLookupContext::EXPRESSION: return "expression";
case IdentifierLookupContext::FUNCTION: return "function";
case IdentifierLookupContext::TABLE_EXPRESSION: return "table expression";
}
}
/** Structure that represent identifier lookup during query analysis.
* Lookup can be in query expression, function, table context.
*/
struct IdentifierLookup
{
Identifier identifier;
IdentifierLookupContext lookup_context;
bool isExpressionLookup() const
{
return lookup_context == IdentifierLookupContext::EXPRESSION;
}
bool isFunctionLookup() const
{
return lookup_context == IdentifierLookupContext::FUNCTION;
}
bool isTableExpressionLookup() const
{
return lookup_context == IdentifierLookupContext::TABLE_EXPRESSION;
}
String dump() const
{
return identifier.getFullName() + ' ' + toString(lookup_context);
}
};
inline bool operator==(const IdentifierLookup & lhs, const IdentifierLookup & rhs)
{
return lhs.identifier.getFullName() == rhs.identifier.getFullName() && lhs.lookup_context == rhs.lookup_context;
}
[[maybe_unused]] inline bool operator!=(const IdentifierLookup & lhs, const IdentifierLookup & rhs)
{
return !(lhs == rhs);
}
struct IdentifierLookupHash
{
size_t operator()(const IdentifierLookup & identifier_lookup) const
{
return std::hash<std::string>()(identifier_lookup.identifier.getFullName()) ^ static_cast<uint8_t>(identifier_lookup.lookup_context);
}
};
enum class IdentifierResolvePlace : UInt8
{
NONE = 0,
EXPRESSION_ARGUMENTS,
ALIASES,
JOIN_TREE,
/// Valid only for table lookup
CTE,
/// Valid only for table lookup
DATABASE_CATALOG
};
inline const char * toString(IdentifierResolvePlace resolved_identifier_place)
{
switch (resolved_identifier_place)
{
case IdentifierResolvePlace::NONE: return "NONE";
case IdentifierResolvePlace::EXPRESSION_ARGUMENTS: return "EXPRESSION_ARGUMENTS";
case IdentifierResolvePlace::ALIASES: return "ALIASES";
case IdentifierResolvePlace::JOIN_TREE: return "JOIN_TREE";
case IdentifierResolvePlace::CTE: return "CTE";
case IdentifierResolvePlace::DATABASE_CATALOG: return "DATABASE_CATALOG";
}
}
struct IdentifierResolveResult
{
IdentifierResolveResult() = default;
QueryTreeNodePtr resolved_identifier;
IdentifierResolvePlace resolve_place = IdentifierResolvePlace::NONE;
bool resolved_from_parent_scopes = false;
[[maybe_unused]] bool isResolved() const
{
return resolve_place != IdentifierResolvePlace::NONE;
}
[[maybe_unused]] bool isResolvedFromParentScopes() const
{
return resolved_from_parent_scopes;
}
[[maybe_unused]] bool isResolvedFromExpressionArguments() const
{
return resolve_place == IdentifierResolvePlace::EXPRESSION_ARGUMENTS;
}
[[maybe_unused]] bool isResolvedFromAliases() const
{
return resolve_place == IdentifierResolvePlace::ALIASES;
}
[[maybe_unused]] bool isResolvedFromJoinTree() const
{
return resolve_place == IdentifierResolvePlace::JOIN_TREE;
}
[[maybe_unused]] bool isResolvedFromCTEs() const
{
return resolve_place == IdentifierResolvePlace::CTE;
}
void dump(WriteBuffer & buffer) const
{
if (!resolved_identifier)
{
buffer << "unresolved";
return;
}
buffer << resolved_identifier->formatASTForErrorMessage() << " place " << toString(resolve_place) << " resolved from parent scopes " << resolved_from_parent_scopes;
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
};
struct IdentifierResolveState
{
IdentifierResolveResult resolve_result;
bool cyclic_identifier_resolve = false;
};
struct IdentifierResolveSettings
{
/// Allow to check join tree during identifier resolution
bool allow_to_check_join_tree = true;
/// Allow to check CTEs during table identifier resolution
bool allow_to_check_cte = true;
/// Allow to check parent scopes during identifier resolution
bool allow_to_check_parent_scopes = true;
/// Allow to check database catalog during table identifier resolution
bool allow_to_check_database_catalog = true;
/// Allow to resolve subquery during identifier resolution
bool allow_to_resolve_subquery_during_identifier_resolution = true;
};
}

View File

@ -0,0 +1,184 @@
#include <Analyzer/Resolve/IdentifierResolveScope.h>
#include <Interpreters/Context.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/UnionNode.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_)
: scope_node(std::move(scope_node_))
, parent_scope(parent_scope_)
{
if (parent_scope)
{
subquery_depth = parent_scope->subquery_depth;
context = parent_scope->context;
projection_mask_map = parent_scope->projection_mask_map;
}
else
projection_mask_map = std::make_shared<std::map<IQueryTreeNode::Hash, size_t>>();
if (auto * union_node = scope_node->as<UnionNode>())
{
context = union_node->getContext();
}
else if (auto * query_node = scope_node->as<QueryNode>())
{
context = query_node->getContext();
group_by_use_nulls = context->getSettingsRef().group_by_use_nulls &&
(query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube());
}
if (context)
join_use_nulls = context->getSettingsRef().join_use_nulls;
else if (parent_scope)
join_use_nulls = parent_scope->join_use_nulls;
aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by;
}
[[maybe_unused]] const IdentifierResolveScope * IdentifierResolveScope::getNearestQueryScope() const
{
const IdentifierResolveScope * scope_to_check = this;
while (scope_to_check != nullptr)
{
if (scope_to_check->scope_node->getNodeType() == QueryTreeNodeType::QUERY)
break;
scope_to_check = scope_to_check->parent_scope;
}
return scope_to_check;
}
IdentifierResolveScope * IdentifierResolveScope::getNearestQueryScope()
{
IdentifierResolveScope * scope_to_check = this;
while (scope_to_check != nullptr)
{
if (scope_to_check->scope_node->getNodeType() == QueryTreeNodeType::QUERY)
break;
scope_to_check = scope_to_check->parent_scope;
}
return scope_to_check;
}
AnalysisTableExpressionData & IdentifierResolveScope::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node)
{
auto it = table_expression_node_to_data.find(table_expression_node);
if (it == table_expression_node_to_data.end())
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Table expression {} data must be initialized. In scope {}",
table_expression_node->formatASTForErrorMessage(),
scope_node->formatASTForErrorMessage());
}
return it->second;
}
const AnalysisTableExpressionData & IdentifierResolveScope::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const
{
auto it = table_expression_node_to_data.find(table_expression_node);
if (it == table_expression_node_to_data.end())
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Table expression {} data must be initialized. In scope {}",
table_expression_node->formatASTForErrorMessage(),
scope_node->formatASTForErrorMessage());
}
return it->second;
}
void IdentifierResolveScope::pushExpressionNode(const QueryTreeNodePtr & node)
{
bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
expressions_in_resolve_process_stack.push(node);
if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by;
}
void IdentifierResolveScope::popExpressionNode()
{
bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
expressions_in_resolve_process_stack.pop();
if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_after_group_by;
}
/// Dump identifier resolve scope
[[maybe_unused]] void IdentifierResolveScope::dump(WriteBuffer & buffer) const
{
buffer << "Scope node " << scope_node->formatASTForErrorMessage() << '\n';
buffer << "Identifier lookup to resolve state " << identifier_lookup_to_resolve_state.size() << '\n';
for (const auto & [identifier, state] : identifier_lookup_to_resolve_state)
{
buffer << "Identifier " << identifier.dump() << " resolve result ";
state.resolve_result.dump(buffer);
buffer << '\n';
}
buffer << "Expression argument name to node " << expression_argument_name_to_node.size() << '\n';
for (const auto & [alias_name, node] : expression_argument_name_to_node)
buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n';
buffer << "Alias name to expression node table size " << aliases.alias_name_to_expression_node->size() << '\n';
for (const auto & [alias_name, node] : *aliases.alias_name_to_expression_node)
buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n';
buffer << "Alias name to function node table size " << aliases.alias_name_to_lambda_node.size() << '\n';
for (const auto & [alias_name, node] : aliases.alias_name_to_lambda_node)
buffer << "Alias name " << alias_name << " lambda node " << node->formatASTForErrorMessage() << '\n';
buffer << "Alias name to table expression node table size " << aliases.alias_name_to_table_expression_node.size() << '\n';
for (const auto & [alias_name, node] : aliases.alias_name_to_table_expression_node)
buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n';
buffer << "CTE name to query node table size " << cte_name_to_query_node.size() << '\n';
for (const auto & [cte_name, node] : cte_name_to_query_node)
buffer << "CTE name " << cte_name << " node " << node->formatASTForErrorMessage() << '\n';
buffer << "WINDOW name to window node table size " << window_name_to_window_node.size() << '\n';
for (const auto & [window_name, node] : window_name_to_window_node)
buffer << "CTE name " << window_name << " node " << node->formatASTForErrorMessage() << '\n';
buffer << "Nodes with duplicated aliases size " << aliases.nodes_with_duplicated_aliases.size() << '\n';
for (const auto & node : aliases.nodes_with_duplicated_aliases)
buffer << "Alias name " << node->getAlias() << " node " << node->formatASTForErrorMessage() << '\n';
buffer << "Expression resolve process stack " << '\n';
expressions_in_resolve_process_stack.dump(buffer);
buffer << "Table expressions in resolve process size " << table_expressions_in_resolve_process.size() << '\n';
for (const auto & node : table_expressions_in_resolve_process)
buffer << "Table expression " << node->formatASTForErrorMessage() << '\n';
buffer << "Non cached identifier lookups during expression resolve " << non_cached_identifier_lookups_during_expression_resolve.size() << '\n';
for (const auto & identifier_lookup : non_cached_identifier_lookups_during_expression_resolve)
buffer << "Identifier lookup " << identifier_lookup.dump() << '\n';
buffer << "Table expression node to data " << table_expression_node_to_data.size() << '\n';
for (const auto & [table_expression_node, table_expression_data] : table_expression_node_to_data)
buffer << "Table expression node " << table_expression_node->formatASTForErrorMessage() << " data " << table_expression_data.dump() << '\n';
buffer << "Use identifier lookup to result cache " << use_identifier_lookup_to_result_cache << '\n';
buffer << "Subquery depth " << subquery_depth << '\n';
}
[[maybe_unused]] String IdentifierResolveScope::dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
}

View File

@ -0,0 +1,231 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Resolve/IdentifierLookup.h>
#include <Analyzer/Resolve/ScopeAliases.h>
#include <Analyzer/Resolve/TableExpressionData.h>
#include <Analyzer/Resolve/ExpressionsStack.h>
namespace DB
{
/** Projection names is name of query tree node that is used in projection part of query node.
* Example: SELECT id FROM test_table;
* `id` is projection name of column node
*
* Example: SELECT id AS id_alias FROM test_table;
* `id_alias` is projection name of column node
*
* Calculation of projection names is done during expression nodes resolution. This is done this way
* because after identifier node is resolved we lose information about identifier name. We could
* potentially save this information in query tree node itself, but that would require to clone it in some cases.
* Example: SELECT big_scalar_subquery AS a, a AS b, b AS c;
* All 3 nodes in projection are the same big_scalar_subquery, but they have different projection names.
* If we want to save it in query tree node, we have to clone subquery node that could lead to performance degradation.
*
* Possible solution is to separate query node metadata and query node content. So only node metadata could be cloned
* if we want to change projection name. This solution does not seem to be easy for client of query tree because projection
* name will be part of interface. If we potentially could hide projection names calculation in analyzer without introducing additional
* changes in query tree structure that would be preferable.
*
* Currently each resolve method returns projection names array. Resolve method must compute projection names of node.
* If node is resolved as list node this is case for `untuple` function or `matcher` result projection names array must contain projection names
* for result nodes.
* If node is not resolved as list node, projection names array contain single projection name for node.
*
* Rules for projection names:
* 1. If node has alias. It is node projection name.
* Except scenario where `untuple` function has alias. Example: SELECT untuple(expr) AS alias, alias.
*
* 2. For constant it is constant value string representation.
*
* 3. For identifier:
* If identifier is resolved from JOIN TREE, we want to remove additional identifier qualifications.
* Example: SELECT default.test_table.id FROM test_table.
* Result projection name is `id`.
*
* Example: SELECT t1.id FROM test_table_1 AS t1, test_table_2 AS t2
* In example both test_table_1, test_table_2 have `id` column.
* In such case projection name is `t1.id` because if additional qualification is removed then column projection name `id` will be ambiguous.
*
* Example: SELECT default.test_table_1.id FROM test_table_1 AS t1, test_table_2 AS t2
* In such case projection name is `test_table_1.id` because we remove unnecessary database qualification, but table name qualification cannot be removed
* because otherwise column projection name `id` will be ambiguous.
*
* If identifier is not resolved from JOIN TREE. Identifier name is projection name.
* Except scenario where `untuple` function resolved using identifier. Example: SELECT untuple(expr) AS alias, alias.
* Example: SELECT sum(1, 1) AS value, value.
* In such case both nodes have `value` projection names.
*
* Example: SELECT id AS value, value FROM test_table.
* In such case both nodes have have `value` projection names.
*
* Special case is `untuple` function. If `untuple` function specified with alias, then result nodes will have alias.tuple_column_name projection names.
* Example: SELECT cast(tuple(1), 'Tuple(id UInt64)') AS value, untuple(value) AS a;
* Result projection names are `value`, `a.id`.
*
* If `untuple` function does not have alias then result nodes will have `tupleElement(untuple_expression_projection_name, 'tuple_column_name') projection names.
*
* Example: SELECT cast(tuple(1), 'Tuple(id UInt64)') AS value, untuple(value);
* Result projection names are `value`, `tupleElement(value, 'id')`;
*
* 4. For function:
* Projection name consists from function_name(parameters_projection_names)(arguments_projection_names).
* Additionally if function is window function. Window node projection name is used with OVER clause.
* Example: function_name (parameters_names)(argument_projection_names) OVER window_name;
* Example: function_name (parameters_names)(argument_projection_names) OVER (PARTITION BY id ORDER BY id).
* Example: function_name (parameters_names)(argument_projection_names) OVER (window_name ORDER BY id).
*
* 5. For lambda:
* If it is standalone lambda that returns single expression, function projection name is used.
* Example: WITH (x -> x + 1) AS lambda SELECT lambda(1).
* Projection name is `lambda(1)`.
*
* If is it standalone lambda that returns list, projection names of list nodes are used.
* Example: WITH (x -> *) AS lambda SELECT lambda(1) FROM test_table;
* If test_table has two columns `id`, `value`. Then result projection names are `id`, `value`.
*
* If lambda is argument of function.
* Then projection name consists from lambda(tuple(lambda_arguments)(lambda_body_projection_name));
*
* 6. For matcher:
* Matched nodes projection names are used as matcher projection names.
*
* Matched nodes must be qualified if needed.
* Example: SELECT * FROM test_table_1 AS t1, test_table_2 AS t2.
* In example table test_table_1 and test_table_2 both have `id`, `value` columns.
* Matched nodes after unqualified matcher resolve must be qualified to avoid ambiguous projection names.
* Result projection names must be `t1.id`, `t1.value`, `t2.id`, `t2.value`.
*
* There are special cases
* 1. For lambda inside APPLY matcher transformer:
* Example: SELECT * APPLY x -> toString(x) FROM test_table.
* In such case lambda argument projection name `x` will be replaced by matched node projection name.
* If table has two columns `id` and `value`. Then result projection names are `toString(id)`, `toString(value)`;
*
* 2. For unqualified matcher when JOIN tree contains JOIN with USING.
* Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 USING(id);
* Result projection names must be `id`, `t1.value`, `t2.value`.
*
* 7. For subquery:
* For subquery projection name consists of `_subquery_` prefix and implementation specific unique number suffix.
* Example: SELECT (SELECT 1), (SELECT 1 UNION DISTINCT SELECT 1);
* Result projection name can be `_subquery_1`, `subquery_2`;
*
* 8. For table:
* Table node can be used in expression context only as right argument of IN function. In that case identifier is used
* as table node projection name.
* Example: SELECT id IN test_table FROM test_table;
* Result projection name is `in(id, test_table)`.
*/
using ProjectionName = String;
using ProjectionNames = std::vector<ProjectionName>;
constexpr auto PROJECTION_NAME_PLACEHOLDER = "__projection_name_placeholder";
struct IdentifierResolveScope
{
/// Construct identifier resolve scope using scope node, and parent scope
IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_);
QueryTreeNodePtr scope_node;
IdentifierResolveScope * parent_scope = nullptr;
ContextPtr context;
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveState, IdentifierLookupHash> identifier_lookup_to_resolve_state;
/// Argument can be expression like constant, column, function or table expression
std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;
ScopeAliases aliases;
/// Table column name to column node. Valid only during table ALIAS columns resolve.
ColumnNameToColumnNodeMap column_name_to_column_node;
/// CTE name to query node
std::unordered_map<std::string, QueryTreeNodePtr> cte_name_to_query_node;
/// Window name to window node
std::unordered_map<std::string, QueryTreeNodePtr> window_name_to_window_node;
/// Current scope expression in resolve process stack
ExpressionsStack expressions_in_resolve_process_stack;
/// Table expressions in resolve process
std::unordered_set<const IQueryTreeNode *> table_expressions_in_resolve_process;
/// Current scope expression
std::unordered_set<IdentifierLookup, IdentifierLookupHash> non_cached_identifier_lookups_during_expression_resolve;
/// Table expression node to data
std::unordered_map<QueryTreeNodePtr, AnalysisTableExpressionData> table_expression_node_to_data;
QueryTreeNodePtrWithHashIgnoreTypesSet nullable_group_by_keys;
/// Here we count the number of nullable GROUP BY keys we met resolving expression.
/// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube`
/// both `number` and `tuple(number)` would be in nullable_group_by_keys.
/// But when we resolve `tuple(tuple(number))` we should figure out that `tuple(number)` is already a key,
/// and we should not convert `number` to nullable.
size_t found_nullable_group_by_key_in_scope = 0;
/** It's possible that after a JOIN, a column in the projection has a type different from the column in the source table.
* (For example, after join_use_nulls or USING column casted to supertype)
* However, the column in the projection still refers to the table as its source.
* This map is used to revert these columns back to their original columns in the source table.
*/
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> join_columns_with_changed_types;
/// Use identifier lookup to result cache
bool use_identifier_lookup_to_result_cache = true;
/// Apply nullability to aggregation keys
bool group_by_use_nulls = false;
/// Join retutns NULLs instead of default values
bool join_use_nulls = false;
/// JOINs count
size_t joins_count = 0;
/// Subquery depth
size_t subquery_depth = 0;
/** Scope join tree node for expression.
* Valid only during analysis construction for single expression.
*/
QueryTreeNodePtr expression_join_tree_node;
/// Node hash to mask id map
std::shared_ptr<std::map<IQueryTreeNode::Hash, size_t>> projection_mask_map;
struct ResolvedFunctionsCache
{
FunctionOverloadResolverPtr resolver;
FunctionBasePtr function_base;
};
std::map<IQueryTreeNode::Hash, ResolvedFunctionsCache> functions_cache;
[[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const;
IdentifierResolveScope * getNearestQueryScope();
AnalysisTableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node);
const AnalysisTableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const;
void pushExpressionNode(const QueryTreeNodePtr & node);
void popExpressionNode();
/// Dump identifier resolve scope
[[maybe_unused]] void dump(WriteBuffer & buffer) const;
[[maybe_unused]] String dump() const;
};
}

View File

@ -0,0 +1,22 @@
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Resolve/QueryAnalyzer.h>
#include <Analyzer/createUniqueTableAliases.h>
namespace DB
{
QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_)
: table_expression(std::move(table_expression_))
, only_analyze(only_analyze_)
{}
QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_analyze_) {}
void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
QueryAnalyzer analyzer(only_analyze);
analyzer.resolve(query_tree_node, table_expression, context);
createUniqueTableAliases(query_tree_node, table_expression, context);
}
}

View File

@ -0,0 +1,378 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Resolve/IdentifierLookup.h>
#include <Core/Joins.h>
#include <Core/NamesAndTypes.h>
#include <Parsers/NullsAction.h>
namespace DB
{
struct GetColumnsOptions;
struct IdentifierResolveScope;
struct AnalysisTableExpressionData;
class QueryExpressionsAliasVisitor ;
class QueryNode;
class JoinNode;
class ColumnNode;
using ProjectionName = String;
using ProjectionNames = std::vector<ProjectionName>;
struct Settings;
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
* And additional documentation for each method, where special cases are described in detail.
*
* Each node in query must be resolved. For each query tree node resolved state is specific.
*
* For constant node no resolve process exists, it is resolved during construction.
*
* For table node no resolve process exists, it is resolved during construction.
*
* For function node to be resolved parameters and arguments must be resolved, function node must be initialized with concrete aggregate or
* non aggregate function and with result type.
*
* For lambda node there can be 2 different cases.
* 1. Standalone: WITH (x -> x + 1) AS lambda SELECT lambda(1); Such lambdas are inlined in query tree during query analysis pass.
* 2. Function arguments: WITH (x -> x + 1) AS lambda SELECT arrayMap(lambda, [1, 2, 3]); For such lambda resolution must
* set concrete lambda arguments (initially they are identifier nodes) and resolve lambda expression body.
*
* For query node resolve process must resolve all its inner nodes.
*
* For matcher node resolve process must replace it with matched nodes.
*
* For identifier node resolve process must replace it with concrete non identifier node. This part is most complex because
* for identifier resolution scopes and identifier lookup context play important part.
*
* ClickHouse SQL support lexical scoping for identifier resolution. Scope can be defined by query node or by expression node.
* Expression nodes that can define scope are lambdas and table ALIAS columns.
*
* Identifier lookup context can be expression, function, table.
*
* Examples: WITH (x -> x + 1) as func SELECT func() FROM func; During function `func` resolution identifier lookup is performed
* in function context.
*
* If there are no information of identifier context rules are following:
* 1. Try to resolve identifier in expression context.
* 2. Try to resolve identifier in function context, if it is allowed. Example: SELECT func(arguments); Here func identifier cannot be resolved in function context
* because query projection does not support that.
* 3. Try to resolve identifier in table context, if it is allowed. Example: SELECT table; Here table identifier cannot be resolved in function context
* because query projection does not support that.
*
* TODO: This does not supported properly before, because matchers could not be resolved from aliases.
*
* Identifiers are resolved with following rules:
* Resolution starts with current scope.
* 1. Try to resolve identifier from expression scope arguments. Lambda expression arguments are greatest priority.
* 2. Try to resolve identifier from aliases.
* 3. Try to resolve identifier from join tree if scope is query, or if there are registered table columns in scope.
* Steps 2 and 3 can be changed using prefer_column_name_to_alias setting.
* 4. If it is table lookup, try to resolve identifier from CTE.
* If identifier could not be resolved in current scope, resolution must be continued in parent scopes.
* 5. Try to resolve identifier from parent scopes.
*
* Additional rules about aliases and scopes.
* 1. Parent scope cannot refer alias from child scope.
* 2. Child scope can refer to alias in parent scope.
*
* Example: SELECT arrayMap(x -> x + 1 AS a, [1,2,3]), a; Identifier a is unknown in parent scope.
* Example: SELECT a FROM (SELECT 1 as a); Here we do not refer to alias a from child query scope. But we query it projection result, similar to tables.
* Example: WITH 1 as a SELECT (SELECT a) as b; Here in child scope identifier a is resolved using alias from parent scope.
*
* Additional rules about identifier binding.
* Bind for identifier to entity means that identifier first part match some node during analysis.
* If other parts of identifier cannot be resolved in that node, exception must be thrown.
*
* Example:
* CREATE TABLE test_table (id UInt64, compound_value Tuple(value UInt64)) ENGINE=TinyLog;
* SELECT compound_value.value, 1 AS compound_value FROM test_table;
* Identifier first part compound_value bound to entity with alias compound_value, but nested identifier part cannot be resolved from entity,
* lookup should not be continued, and exception must be thrown because if lookup continues that way identifier can be resolved from join tree.
*
* TODO: This was not supported properly before analyzer because nested identifier could not be resolved from alias.
*
* More complex example:
* CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog;
* WITH cast(('Value'), 'Tuple (value UInt64') AS value SELECT (SELECT value FROM test_table);
* Identifier first part value bound to test_table column value, but nested identifier part cannot be resolved from it,
* lookup should not be continued, and exception must be thrown because if lookup continues identifier can be resolved from parent scope.
*
* TODO: Update exception messages
* TODO: Table identifiers with optional UUID.
* TODO: Lookup functions arrayReduce(sum, [1, 2, 3]);
* TODO: Support function identifier resolve from parent query scope, if lambda in parent scope does not capture any columns.
*/
class QueryAnalyzer
{
public:
explicit QueryAnalyzer(bool only_analyze_);
~QueryAnalyzer();
void resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context);
private:
/// Utility functions
static bool isExpressionNodeType(QueryTreeNodeType node_type);
static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type);
static bool isSubqueryNodeType(QueryTreeNodeType node_type);
static bool isTableExpressionNodeType(QueryTreeNodeType node_type);
static DataTypePtr getExpressionNodeResultTypeOrNull(const QueryTreeNodePtr & query_tree_node);
static ProjectionName calculateFunctionProjectionName(const QueryTreeNodePtr & function_node,
const ProjectionNames & parameters_projection_names,
const ProjectionNames & arguments_projection_names);
static ProjectionName calculateWindowProjectionName(const QueryTreeNodePtr & window_node,
const QueryTreeNodePtr & parent_window_node,
const String & parent_window_name,
const ProjectionNames & partition_by_projection_names,
const ProjectionNames & order_by_projection_names,
const ProjectionName & frame_begin_offset_projection_name,
const ProjectionName & frame_end_offset_projection_name);
static ProjectionName calculateSortColumnProjectionName(const QueryTreeNodePtr & sort_column_node,
const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name,
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name);
static void collectCompoundExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const DataTypePtr & compound_expression_type,
const Identifier & valid_identifier_prefix,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectTableExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const QueryTreeNodePtr & table_expression,
const AnalysisTableExpressionData & table_expression_data,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeWithParentScopesValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static std::vector<String> collectIdentifierTypoHints(const Identifier & unresolved_identifier, const std::unordered_set<Identifier> & valid_identifiers);
static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path);
QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope);
static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope);
void replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope);
static void convertLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope);
static void validateTableExpressionModifiers(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
static void checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope);
static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
static void expandGroupByAll(QueryNode & query_tree_node_typed);
void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node);
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
const QueryTreeNodePtr & resolved_identifier,
const JoinKind & join_kind,
std::optional<JoinTableSide> resolved_side,
IdentifierResolveScope & scope);
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
size_t identifier_bind_size,
const QueryTreeNodePtr & compound_expression,
String compound_expression_source,
IdentifierResolveScope & scope,
bool can_be_not_found = false);
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromAliases(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings);
QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpressions(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr matchArrayJoinSubcolumns(
const QueryTreeNodePtr & array_join_column_inner_expression,
const ColumnNode & array_join_column_expression_typed,
const QueryTreeNodePtr & resolved_expression,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & join_tree_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope);
IdentifierResolveResult tryResolveIdentifierInParentScopes(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
IdentifierResolveResult tryResolveIdentifier(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings = {});
QueryTreeNodePtr tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
const AnalysisTableExpressionData & table_expression_data,
IdentifierResolveScope & scope,
size_t identifier_column_qualifier_parts,
bool can_be_not_found = false);
/// Resolve query tree nodes functions
void qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & column_nodes,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
static GetColumnsOptions buildGetColumnsOptions(QueryTreeNodePtr & matcher_node, const ContextPtr & context);
using QueryTreeNodesWithNames = std::vector<std::pair<QueryTreeNodePtr, std::string>>;
QueryTreeNodesWithNames getMatchedColumnNodesWithNames(const QueryTreeNodePtr & matcher_node,
const QueryTreeNodePtr & table_expression_node,
const NamesAndTypes & matched_columns,
const IdentifierResolveScope & scope);
void updateMatchedColumnsFromJoinUsing(QueryTreeNodesWithNames & result_matched_column_nodes_with_names, const QueryTreeNodePtr & source_table_expression, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
ProjectionNames resolveMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
ProjectionName resolveWindow(QueryTreeNodePtr & window_node, IdentifierResolveScope & scope);
ProjectionNames resolveLambda(const QueryTreeNodePtr & lambda_node,
const QueryTreeNodePtr & lambda_node_to_resolve,
const QueryTreeNodes & lambda_arguments,
IdentifierResolveScope & scope);
ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias = false);
ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope);
void resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope);
void resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope);
void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope);
NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope);
void initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope);
void initializeTableExpressionData(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
void resolveTableFunction(QueryTreeNodePtr & table_function_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor, bool nested_table_function);
void resolveArrayJoin(QueryTreeNodePtr & array_join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor);
void resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor);
void resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor);
void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope);
void resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope);
/// Lambdas that are currently in resolve process
std::unordered_set<IQueryTreeNode *> lambdas_in_resolve_process;
/// CTEs that are currently in resolve process
std::unordered_set<std::string_view> ctes_in_resolve_process;
/// Function name to user defined lambda map
std::unordered_map<std::string, QueryTreeNodePtr> function_name_to_user_defined_lambda;
/// Array join expressions counter
size_t array_join_expressions_counter = 1;
/// Subquery counter
size_t subquery_counter = 1;
/// Global expression node to projection name map
std::unordered_map<QueryTreeNodePtr, ProjectionName> node_to_projection_name;
/// Global resolve expression node to projection names map
std::unordered_map<QueryTreeNodePtr, ProjectionNames> resolved_expressions;
/// Global resolve expression node to tree size
std::unordered_map<QueryTreeNodePtr, size_t> node_to_tree_size;
/// Global scalar subquery to scalar value map
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_local;
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_global;
const bool only_analyze;
};
}

View File

@ -0,0 +1,119 @@
#pragma once
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/Resolve/ScopeAliases.h>
#include <Analyzer/LambdaNode.h>
namespace DB
{
/** Visitor that extracts expression and function aliases from node and initialize scope tables with it.
* Does not go into child lambdas and queries.
*
* Important:
* Identifier nodes with aliases are added both in alias to expression and alias to function map.
*
* These is necessary because identifier with alias can give alias name to any query tree node.
*
* Example:
* WITH (x -> x + 1) AS id, id AS value SELECT value(1);
* In this example id as value is identifier node that has alias, during scope initialization we cannot derive
* that id is actually lambda or expression.
*
* There are no easy solution here, without trying to make full featured expression resolution at this stage.
* Example:
* WITH (x -> x + 1) AS id, id AS id_1, id_1 AS id_2 SELECT id_2(1);
* Example: SELECT a, b AS a, b AS c, 1 AS c;
*
* It is client responsibility after resolving identifier node with alias, make following actions:
* 1. If identifier node was resolved in function scope, remove alias from scope expression map.
* 2. If identifier node was resolved in expression scope, remove alias from scope function map.
*
* That way we separate alias map initialization and expressions resolution.
*/
class QueryExpressionsAliasVisitor : public InDepthQueryTreeVisitor<QueryExpressionsAliasVisitor>
{
public:
explicit QueryExpressionsAliasVisitor(ScopeAliases & aliases_)
: aliases(aliases_)
{}
void visitImpl(QueryTreeNodePtr & node)
{
updateAliasesIfNeeded(node, false /*is_lambda_node*/);
}
bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child)
{
if (auto * lambda_node = child->as<LambdaNode>())
{
updateAliasesIfNeeded(child, true /*is_lambda_node*/);
return false;
}
else if (auto * query_tree_node = child->as<QueryNode>())
{
if (query_tree_node->isCTE())
return false;
updateAliasesIfNeeded(child, false /*is_lambda_node*/);
return false;
}
else if (auto * union_node = child->as<UnionNode>())
{
if (union_node->isCTE())
return false;
updateAliasesIfNeeded(child, false /*is_lambda_node*/);
return false;
}
return true;
}
private:
void addDuplicatingAlias(const QueryTreeNodePtr & node)
{
aliases.nodes_with_duplicated_aliases.emplace(node);
auto cloned_node = node->clone();
aliases.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node);
aliases.nodes_with_duplicated_aliases.emplace(cloned_node);
}
void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node)
{
if (!node->hasAlias())
return;
// We should not resolve expressions to WindowNode
if (node->getNodeType() == QueryTreeNodeType::WINDOW)
return;
const auto & alias = node->getAlias();
if (is_lambda_node)
{
if (aliases.alias_name_to_expression_node->contains(alias))
addDuplicatingAlias(node);
auto [_, inserted] = aliases.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
if (!inserted)
addDuplicatingAlias(node);
return;
}
if (aliases.alias_name_to_lambda_node.contains(alias))
addDuplicatingAlias(node);
auto [_, inserted] = aliases.alias_name_to_expression_node->insert(std::make_pair(alias, node));
if (!inserted)
addDuplicatingAlias(node);
/// If node is identifier put it into transitive aliases map.
if (const auto * identifier = typeid_cast<const IdentifierNode *>(node.get()))
aliases.transitive_aliases.insert(std::make_pair(alias, identifier->getIdentifier()));
}
ScopeAliases & aliases;
};
}

View File

@ -0,0 +1,91 @@
#pragma once
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Resolve/IdentifierLookup.h>
namespace DB
{
struct ScopeAliases
{
/// Alias name to query expression node
std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_before_group_by;
std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_after_group_by;
std::unordered_map<std::string, QueryTreeNodePtr> * alias_name_to_expression_node = nullptr;
/// Alias name to lambda node
std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_lambda_node;
/// Alias name to table expression node
std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_table_expression_node;
/// Expressions like `x as y` where we can't say whether it's a function, expression or table.
std::unordered_map<std::string, Identifier> transitive_aliases;
/// Nodes with duplicated aliases
std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
/// Names which are aliases from ARRAY JOIN.
/// This is needed to properly qualify columns from matchers and avoid name collision.
std::unordered_set<std::string> array_join_aliases;
std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
{
switch (lookup_context)
{
case IdentifierLookupContext::EXPRESSION: return *alias_name_to_expression_node;
case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node;
case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node;
}
}
enum class FindOption
{
FIRST_NAME,
FULL_NAME,
};
const std::string & getKey(const Identifier & identifier, FindOption find_option)
{
switch (find_option)
{
case FindOption::FIRST_NAME: return identifier.front();
case FindOption::FULL_NAME: return identifier.getFullName();
}
}
QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option)
{
auto & alias_map = getAliasMap(lookup.lookup_context);
const std::string * key = &getKey(lookup.identifier, find_option);
auto it = alias_map.find(*key);
if (it != alias_map.end())
return &it->second;
if (lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION)
return {};
while (it == alias_map.end())
{
auto jt = transitive_aliases.find(*key);
if (jt == transitive_aliases.end())
return {};
key = &(getKey(jt->second, find_option));
it = alias_map.find(*key);
}
return &it->second;
}
const QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) const
{
return const_cast<ScopeAliases *>(this)->find(lookup, find_option);
}
};
}

View File

@ -0,0 +1,83 @@
#pragma once
#include <IO/Operators.h>
#include <Analyzer/ColumnNode.h>
namespace DB
{
struct StringTransparentHash
{
using is_transparent = void;
using hash = std::hash<std::string_view>;
[[maybe_unused]] size_t operator()(const char * data) const
{
return hash()(data);
}
size_t operator()(std::string_view data) const
{
return hash()(data);
}
size_t operator()(const std::string & data) const
{
return hash()(data);
}
};
using ColumnNameToColumnNodeMap = std::unordered_map<std::string, ColumnNodePtr, StringTransparentHash, std::equal_to<>>;
struct AnalysisTableExpressionData
{
std::string table_expression_name;
std::string table_expression_description;
std::string database_name;
std::string table_name;
bool should_qualify_columns = true;
NamesAndTypes column_names_and_types;
ColumnNameToColumnNodeMap column_name_to_column_node;
std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns
std::unordered_set<std::string, StringTransparentHash, std::equal_to<>> column_identifier_first_parts;
bool hasFullIdentifierName(IdentifierView identifier_view) const
{
return column_name_to_column_node.contains(identifier_view.getFullName());
}
bool canBindIdentifier(IdentifierView identifier_view) const
{
return column_identifier_first_parts.contains(identifier_view.at(0));
}
[[maybe_unused]] void dump(WriteBuffer & buffer) const
{
buffer << "Table expression name " << table_expression_name;
if (!table_expression_description.empty())
buffer << " table expression description " << table_expression_description;
if (!database_name.empty())
buffer << " database name " << database_name;
if (!table_name.empty())
buffer << " table name " << table_name;
buffer << " should qualify columns " << should_qualify_columns;
buffer << " columns size " << column_name_to_column_node.size() << '\n';
for (const auto & [column_name, column_node] : column_name_to_column_node)
buffer << "Column name " << column_name << " column node " << column_node->dumpTree() << '\n';
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
};
}

View File

@ -0,0 +1,71 @@
#pragma once
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/Resolve/IdentifierResolveScope.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/JoinNode.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
}
class TableExpressionsAliasVisitor : public InDepthQueryTreeVisitor<TableExpressionsAliasVisitor>
{
public:
explicit TableExpressionsAliasVisitor(IdentifierResolveScope & scope_)
: scope(scope_)
{}
void visitImpl(QueryTreeNodePtr & node)
{
updateAliasesIfNeeded(node);
}
static bool needChildVisit(const QueryTreeNodePtr & node, const QueryTreeNodePtr & child)
{
auto node_type = node->getNodeType();
switch (node_type)
{
case QueryTreeNodeType::ARRAY_JOIN:
{
const auto & array_join_node = node->as<const ArrayJoinNode &>();
return child.get() == array_join_node.getTableExpression().get();
}
case QueryTreeNodeType::JOIN:
{
const auto & join_node = node->as<const JoinNode &>();
return child.get() == join_node.getLeftTableExpression().get() || child.get() == join_node.getRightTableExpression().get();
}
default:
{
break;
}
}
return false;
}
private:
void updateAliasesIfNeeded(const QueryTreeNodePtr & node)
{
if (!node->hasAlias())
return;
const auto & node_alias = node->getAlias();
auto [_, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(node_alias, node);
if (!inserted)
throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
"Multiple table expressions with same alias {}. In scope {}",
node_alias,
scope.scope_node->formatASTForErrorMessage());
}
IdentifierResolveScope & scope;
};
}

View File

@ -188,6 +188,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
fs::path(s3_uri.key) / path_in_backup,
0,
file_size,
/* dest_s3_client= */ destination_disk->getS3StorageClient(),
/* dest_bucket= */ blob_path[1],
/* dest_key= */ blob_path[0],
s3_settings.request_settings,
@ -252,18 +253,20 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
{
LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
copyS3File(
client,
src_disk->getS3StorageClient(),
/* src_bucket */ blob_path[1],
/* src_key= */ blob_path[0],
start_pos,
length,
s3_uri.bucket,
fs::path(s3_uri.key) / path_in_backup,
/* dest_s3_client= */ client,
/* dest_bucket= */ s3_uri.bucket,
/* dest_key= */ fs::path(s3_uri.key) / path_in_backup,
s3_settings.request_settings,
read_settings,
blob_storage_log,
{},
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
/*for_disk_s3=*/false);
return; /// copied!
}
}
@ -281,8 +284,9 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
/* src_key= */ fs::path(s3_uri.key) / source,
0,
size,
s3_uri.bucket,
fs::path(s3_uri.key) / destination,
/* dest_s3_client= */ client,
/* dest_bucket= */ s3_uri.bucket,
/* dest_key= */ fs::path(s3_uri.key) / destination,
s3_settings.request_settings,
read_settings,
blob_storage_log,

View File

@ -14,8 +14,8 @@ namespace DB
class CatBoostLibraryBridgeHelper final : public LibraryBridgeHelper
{
public:
static constexpr inline auto PING_HANDLER = "/catboost_ping";
static constexpr inline auto MAIN_HANDLER = "/catboost_request";
static constexpr auto PING_HANDLER = "/catboost_ping";
static constexpr auto MAIN_HANDLER = "/catboost_request";
explicit CatBoostLibraryBridgeHelper(
ContextPtr context_,
@ -38,11 +38,11 @@ protected:
bool bridgeHandShake() override;
private:
static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list";
static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
static constexpr auto CATBOOST_LIST_METHOD = "catboost_list";
static constexpr auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
static constexpr auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
static constexpr auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
static constexpr auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
Poco::URI createRequestURI(const String & method) const;

View File

@ -25,8 +25,8 @@ public:
String dict_attributes;
};
static constexpr inline auto PING_HANDLER = "/extdict_ping";
static constexpr inline auto MAIN_HANDLER = "/extdict_request";
static constexpr auto PING_HANDLER = "/extdict_ping";
static constexpr auto MAIN_HANDLER = "/extdict_request";
ExternalDictionaryLibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
@ -62,14 +62,14 @@ protected:
ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const;
private:
static constexpr inline auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew";
static constexpr inline auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone";
static constexpr inline auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete";
static constexpr inline auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll";
static constexpr inline auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds";
static constexpr inline auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys";
static constexpr inline auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified";
static constexpr inline auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad";
static constexpr auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew";
static constexpr auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone";
static constexpr auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete";
static constexpr auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll";
static constexpr auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds";
static constexpr auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys";
static constexpr auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified";
static constexpr auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad";
Poco::URI createRequestURI(const String & method) const;

View File

@ -16,9 +16,9 @@ class IBridgeHelper: protected WithContext
{
public:
static constexpr inline auto DEFAULT_HOST = "127.0.0.1";
static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
static constexpr inline auto PING_OK_ANSWER = "Ok.";
static constexpr auto DEFAULT_HOST = "127.0.0.1";
static constexpr auto DEFAULT_FORMAT = "RowBinary";
static constexpr auto PING_OK_ANSWER = "Ok.";
static const inline std::string PING_METHOD = Poco::Net::HTTPRequest::HTTP_GET;
static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST;

View File

@ -37,7 +37,7 @@ protected:
Poco::URI createBaseURI() const override;
static constexpr inline size_t DEFAULT_PORT = 9012;
static constexpr size_t DEFAULT_PORT = 9012;
const Poco::Util::AbstractConfiguration & config;
LoggerPtr log;

View File

@ -52,12 +52,12 @@ class XDBCBridgeHelper : public IXDBCBridgeHelper
{
public:
static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
static constexpr inline auto PING_HANDLER = "/ping";
static constexpr inline auto MAIN_HANDLER = "/";
static constexpr inline auto COL_INFO_HANDLER = "/columns_info";
static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
static constexpr auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
static constexpr auto PING_HANDLER = "/ping";
static constexpr auto MAIN_HANDLER = "/";
static constexpr auto COL_INFO_HANDLER = "/columns_info";
static constexpr auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
static constexpr auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
XDBCBridgeHelper(
ContextPtr context_,
@ -256,7 +256,7 @@ protected:
struct JDBCBridgeMixin
{
static constexpr inline auto DEFAULT_PORT = 9019;
static constexpr auto DEFAULT_PORT = 9019;
static String configPrefix()
{
@ -287,7 +287,7 @@ struct JDBCBridgeMixin
struct ODBCBridgeMixin
{
static constexpr inline auto DEFAULT_PORT = 9018;
static constexpr auto DEFAULT_PORT = 9018;
static String configPrefix()
{

View File

@ -215,6 +215,7 @@ add_object_library(clickhouse_databases_mysql Databases/MySQL)
add_object_library(clickhouse_disks Disks)
add_object_library(clickhouse_analyzer Analyzer)
add_object_library(clickhouse_analyzer_passes Analyzer/Passes)
add_object_library(clickhouse_analyzer_passes Analyzer/Resolve)
add_object_library(clickhouse_planner Planner)
add_object_library(clickhouse_interpreters Interpreters)
add_object_library(clickhouse_interpreters_cache Interpreters/Cache)

View File

@ -8,7 +8,6 @@
#include <Common/SipHash.h>
#include <Common/WeakHash.h>
#include <Common/iota.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <algorithm>
#include <bit>
@ -323,7 +322,9 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const
size_t res_offset = 0;
auto offset_it = begin();
for (size_t i = 0; i < _size; ++i, ++offset_it)
/// Replace the `++offset_it` with `offset_it.increaseCurrentRow()` and `offset_it.increaseCurrentOffset()`,
/// to remove the redundant `isDefault()` in `++` of `Interator` and reuse the following `isDefault()`.
for (size_t i = 0; i < _size; ++i, offset_it.increaseCurrentRow())
{
if (!offset_it.isDefault())
{
@ -338,6 +339,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const
{
values_filter.push_back(0);
}
offset_it.increaseCurrentOffset();
}
else
{

View File

@ -189,6 +189,8 @@ public:
size_t ALWAYS_INLINE getValueIndex() const { return isDefault() ? 0 : current_offset + 1; }
size_t ALWAYS_INLINE getCurrentRow() const { return current_row; }
size_t ALWAYS_INLINE getCurrentOffset() const { return current_offset; }
size_t ALWAYS_INLINE increaseCurrentRow() { return ++current_row; }
size_t ALWAYS_INLINE increaseCurrentOffset() { return ++current_offset; }
bool operator==(const Iterator & other) const
{

View File

@ -69,9 +69,9 @@ union CPUInfo
UInt32 edx;
} registers;
inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
};
inline bool haveRDTSCP() noexcept

View File

@ -453,7 +453,7 @@ protected:
/// Return the columns which actually contain the values of the keys.
/// For a given key column, if it is nullable, we return its nested
/// column. Otherwise we return the key column itself.
inline const ColumnRawPtrs & getActualColumns() const
const ColumnRawPtrs & getActualColumns() const
{
return actual_columns;
}

View File

@ -292,13 +292,13 @@ private:
}
template <typename T>
inline T & getContainer()
T & getContainer()
{
return *reinterpret_cast<T *>(address & mask);
}
template <typename T>
inline const T & getContainer() const
const T & getContainer() const
{
return *reinterpret_cast<T *>(address & mask);
}
@ -309,7 +309,7 @@ private:
address |= static_cast<UInt8>(t);
}
inline details::ContainerType getContainerType() const
details::ContainerType getContainerType() const
{
return static_cast<details::ContainerType>(address & ~mask);
}

View File

@ -116,7 +116,7 @@ public:
/** Return the current cell number and the corresponding content.
*/
inline std::pair<BucketIndex, UInt8> get() const
std::pair<BucketIndex, UInt8> get() const
{
if ((current_bucket_index == 0) || is_eof)
throw Exception(ErrorCodes::NO_AVAILABLE_DATA, "No available data.");

View File

@ -37,7 +37,7 @@ namespace fs = std::filesystem;
class CounterInFile
{
private:
static inline constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16;
static constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16;
public:
/// path - the name of the file, including the path

View File

@ -174,6 +174,11 @@
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
\
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
M(DiskPlainRewritableS3DirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for S3ObjectStorage.") \
\
M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
M(MergeTreePartsLoaderThreadsScheduled, "Number of queued or active jobs in the MergeTree parts loader thread pool.") \

View File

@ -64,7 +64,7 @@ public:
static ProfileEvents::Counters & getProfileEvents();
inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker()
{
if (unlikely(!current_thread))
if (!current_thread) [[unlikely]]
return nullptr;
return &current_thread->memory_tracker;
}

View File

@ -115,6 +115,12 @@ class FixedHashTable : private boost::noncopyable, protected Allocator, protecte
{
static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
/// We maintain min and max values inserted into the hash table to then limit the amount of cells to traverse to the [min; max] range.
/// Both values could be efficiently calculated only within `emplace` calls (and not when we populate the hash table in `read` method for example), so we update them only within `emplace` and track if any other method was called.
bool only_emplace_was_used_to_insert_data = true;
size_t min = NUM_CELLS - 1;
size_t max = 0;
protected:
friend class const_iterator;
friend class iterator;
@ -170,6 +176,8 @@ protected:
/// Skip empty cells in the main buffer.
const auto * buf_end = container->buf + container->NUM_CELLS;
if (container->canUseMinMaxOptimization())
buf_end = container->buf + container->max + 1;
while (ptr < buf_end && ptr->isZero(*container))
++ptr;
@ -261,7 +269,7 @@ public:
return true;
}
inline const value_type & get() const
const value_type & get() const
{
if (!is_initialized || is_eof)
throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");
@ -297,12 +305,7 @@ public:
if (!buf)
return end();
const Cell * ptr = buf;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return const_iterator(this, ptr);
return const_iterator(this, firstPopulatedCell());
}
const_iterator cbegin() const { return begin(); }
@ -312,18 +315,13 @@ public:
if (!buf)
return end();
Cell * ptr = buf;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return iterator(this, ptr);
return iterator(this, const_cast<Cell *>(firstPopulatedCell()));
}
const_iterator end() const
{
/// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
return const_iterator(this, buf ? buf + NUM_CELLS : buf);
return const_iterator(this, buf ? lastPopulatedCell() : buf);
}
const_iterator cend() const
@ -333,7 +331,7 @@ public:
iterator end()
{
return iterator(this, buf ? buf + NUM_CELLS : buf);
return iterator(this, buf ? lastPopulatedCell() : buf);
}
@ -350,6 +348,8 @@ public:
new (&buf[x]) Cell(x, *this);
inserted = true;
if (x < min) min = x;
if (x > max) max = x;
this->increaseSize();
}
@ -377,6 +377,26 @@ public:
bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
/// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `only_emplace_was_used_to_insert_data`
/// will check if the FixedHashTable will only use `emplace()` to insert the raw data.
bool ALWAYS_INLINE canUseMinMaxOptimization() const { return ((max >= min) && only_emplace_was_used_to_insert_data); }
const Cell * ALWAYS_INLINE firstPopulatedCell() const
{
const Cell * ptr = buf;
if (!canUseMinMaxOptimization())
{
while (ptr < buf + NUM_CELLS && ptr->isZero(*this))
++ptr;
}
else
ptr = buf + min;
return ptr;
}
Cell * ALWAYS_INLINE lastPopulatedCell() const { return canUseMinMaxOptimization() ? buf + max + 1 : buf + NUM_CELLS; }
void write(DB::WriteBuffer & wb) const
{
Cell::State::write(wb);
@ -433,6 +453,7 @@ public:
x.read(rb);
new (&buf[place_value]) Cell(x, *this);
}
only_emplace_was_used_to_insert_data = false;
}
void readText(DB::ReadBuffer & rb)
@ -455,6 +476,7 @@ public:
x.readText(rb);
new (&buf[place_value]) Cell(x, *this);
}
only_emplace_was_used_to_insert_data = false;
}
size_t size() const { return this->getSize(buf, *this, NUM_CELLS); }
@ -493,7 +515,11 @@ public:
}
const Cell * data() const { return buf; }
Cell * data() { return buf; }
Cell * data()
{
only_emplace_was_used_to_insert_data = false;
return buf;
}
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const { return 0; }

View File

@ -844,7 +844,7 @@ public:
return true;
}
inline const value_type & get() const
const value_type & get() const
{
if (!is_initialized || is_eof)
throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");

View File

@ -69,7 +69,7 @@ struct PackedHashMapCell : public HashMapCell<Key, TMapped, Hash, TState, Packed
bool isZero(const State & state) const { return isZero(this->value.first, state); }
static bool isZero(const Key key, const State & /*state*/) { return ZeroTraits::check(key); }
static inline bool bitEqualsByValue(key_type a, key_type b) { return a == b; }
static bool bitEqualsByValue(key_type a, key_type b) { return a == b; }
template <size_t I>
auto get() const

View File

@ -112,7 +112,7 @@ public:
return true;
}
inline const value_type & get() const
const value_type & get() const
{
if (!is_initialized || is_eof)
throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");

View File

@ -128,13 +128,13 @@ public:
{
}
inline void update(UInt8 cur_rank, UInt8 new_rank)
void update(UInt8 cur_rank, UInt8 new_rank)
{
denominator -= static_cast<T>(1.0) / (1ULL << cur_rank);
denominator += static_cast<T>(1.0) / (1ULL << new_rank);
}
inline void update(UInt8 rank)
void update(UInt8 rank)
{
denominator += static_cast<T>(1.0) / (1ULL << rank);
}
@ -166,13 +166,13 @@ public:
rank_count[0] = static_cast<UInt32>(initial_value);
}
inline void update(UInt8 cur_rank, UInt8 new_rank)
void update(UInt8 cur_rank, UInt8 new_rank)
{
--rank_count[cur_rank];
++rank_count[new_rank];
}
inline void update(UInt8 rank)
void update(UInt8 rank)
{
++rank_count[rank];
}
@ -429,13 +429,13 @@ public:
private:
/// Extract subset of bits in [begin, end[ range.
inline HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const
HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const
{
return (val >> begin) & ((1ULL << (end - begin)) - 1);
}
/// Rank is number of trailing zeros.
inline UInt8 calculateRank(HashValueType val) const
UInt8 calculateRank(HashValueType val) const
{
if (unlikely(val == 0))
return max_rank;
@ -448,7 +448,7 @@ private:
return zeros_plus_one;
}
inline HashValueType getHash(Value key) const
HashValueType getHash(Value key) const
{
/// NOTE: this should be OK, since value is the same as key for HLL.
return static_cast<HashValueType>(
@ -496,7 +496,7 @@ private:
throw Poco::Exception("Internal error", DB::ErrorCodes::LOGICAL_ERROR);
}
inline double applyCorrection(double raw_estimate) const
double applyCorrection(double raw_estimate) const
{
double fixed_estimate;
@ -525,7 +525,7 @@ private:
/// Correction used in HyperLogLog++ algorithm.
/// Source: "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm"
/// (S. Heule et al., Proceedings of the EDBT 2013 Conference).
inline double applyBiasCorrection(double raw_estimate) const
double applyBiasCorrection(double raw_estimate) const
{
double fixed_estimate;
@ -540,7 +540,7 @@ private:
/// Calculation of unique values using LinearCounting algorithm.
/// Source: "A Linear-time Probabilistic Counting Algorithm for Database Applications"
/// (Whang et al., ACM Trans. Database Syst., pp. 208-229, 1990).
inline double applyLinearCorrection(double raw_estimate) const
double applyLinearCorrection(double raw_estimate) const
{
double fixed_estimate;

View File

@ -23,7 +23,7 @@ struct Interval
Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { }
inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
};
template <typename IntervalStorageType>
@ -290,7 +290,7 @@ private:
IntervalStorageType middle_element;
inline bool hasValue() const { return sorted_intervals_range_size != 0; }
bool hasValue() const { return sorted_intervals_range_size != 0; }
};
using IntervalWithEmptyValue = Interval;
@ -585,7 +585,7 @@ private:
}
}
inline size_t findFirstIteratorNodeIndex() const
size_t findFirstIteratorNodeIndex() const
{
size_t nodes_size = nodes.size();
size_t result_index = 0;
@ -602,7 +602,7 @@ private:
return result_index;
}
inline size_t findLastIteratorNodeIndex() const
size_t findLastIteratorNodeIndex() const
{
if (unlikely(nodes.empty()))
return 0;
@ -618,7 +618,7 @@ private:
return result_index;
}
inline void increaseIntervalsSize()
void increaseIntervalsSize()
{
/// Before tree is build we store all intervals size in our first node to allow tree iteration.
++intervals_size;
@ -630,7 +630,7 @@ private:
size_t intervals_size = 0;
bool tree_is_built = false;
static inline const Interval & getInterval(const IntervalWithValue & interval_with_value)
static const Interval & getInterval(const IntervalWithValue & interval_with_value)
{
if constexpr (is_empty_value)
return interval_with_value;
@ -639,7 +639,7 @@ private:
}
template <typename IntervalCallback>
static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
static bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
{
if constexpr (is_empty_value)
return callback(interval);
@ -647,7 +647,7 @@ private:
return callback(interval.first, interval.second);
}
static inline void
static void
intervalsToPoints(const std::vector<IntervalWithValue> & intervals, std::vector<IntervalStorageType> & temporary_points_storage)
{
for (const auto & interval_with_value : intervals)
@ -658,7 +658,7 @@ private:
}
}
static inline IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
static IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
{
size_t size = points.size();
size_t middle_element_index = size / 2;

View File

@ -26,62 +26,62 @@ class SimdJSONBasicFormatter
{
public:
explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
inline void comma() { oneChar(','); }
void comma() { oneChar(','); }
/** Start an array, prints [ **/
inline void startArray() { oneChar('['); }
void startArray() { oneChar('['); }
/** End an array, prints ] **/
inline void endArray() { oneChar(']'); }
void endArray() { oneChar(']'); }
/** Start an array, prints { **/
inline void startObject() { oneChar('{'); }
void startObject() { oneChar('{'); }
/** Start an array, prints } **/
inline void endObject() { oneChar('}'); }
void endObject() { oneChar('}'); }
/** Prints a true **/
inline void trueAtom()
void trueAtom()
{
const char * s = "true";
buffer.insert(s, s + 4);
}
/** Prints a false **/
inline void falseAtom()
void falseAtom()
{
const char * s = "false";
buffer.insert(s, s + 5);
}
/** Prints a null **/
inline void nullAtom()
void nullAtom()
{
const char * s = "null";
buffer.insert(s, s + 4);
}
/** Prints a number **/
inline void number(int64_t x)
void number(int64_t x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a number **/
inline void number(uint64_t x)
void number(uint64_t x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a number **/
inline void number(double x)
void number(double x)
{
char number_buffer[24];
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
buffer.insert(number_buffer, res.ptr);
}
/** Prints a key (string + colon) **/
inline void key(std::string_view unescaped)
void key(std::string_view unescaped)
{
string(unescaped);
oneChar(':');
}
/** Prints a string. The string is escaped as needed. **/
inline void string(std::string_view unescaped)
void string(std::string_view unescaped)
{
oneChar('\"');
size_t i = 0;
@ -165,7 +165,7 @@ public:
oneChar('\"');
}
inline void oneChar(char c)
void oneChar(char c)
{
buffer.push_back(c);
}
@ -182,7 +182,7 @@ class SimdJSONElementFormatter
public:
explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
/** Append an element to the builder (to be printed) **/
inline void append(simdjson::dom::element value)
void append(simdjson::dom::element value)
{
switch (value.type())
{
@ -224,7 +224,7 @@ public:
}
}
/** Append an array to the builder (to be printed) **/
inline void append(simdjson::dom::array value)
void append(simdjson::dom::array value)
{
format.startArray();
auto iter = value.begin();
@ -241,7 +241,7 @@ public:
format.endArray();
}
inline void append(simdjson::dom::object value)
void append(simdjson::dom::object value)
{
format.startObject();
auto pair = value.begin();
@ -258,7 +258,7 @@ public:
format.endObject();
}
inline void append(simdjson::dom::key_value_pair kv)
void append(simdjson::dom::key_value_pair kv)
{
format.key(kv.key);
append(kv.value);

View File

@ -284,7 +284,7 @@ public:
}
template <typename It1, typename It2>
inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
{
#if !defined(NDEBUG)
const char * ptr_begin = reinterpret_cast<const char *>(&*from_begin);

View File

@ -174,7 +174,7 @@ public:
items.emplace_back(std::make_shared<PooledObject>(allocObject(), *this));
}
inline size_t size()
size_t size()
{
std::lock_guard lock(mutex);
return items.size();

View File

@ -417,6 +417,13 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(DiskPlainRewritableAzureDirectoryCreated, "Number of directories created by the 'plain_rewritable' metadata storage for AzureObjectStorage.") \
M(DiskPlainRewritableAzureDirectoryRemoved, "Number of directories removed by the 'plain_rewritable' metadata storage for AzureObjectStorage.") \
M(DiskPlainRewritableLocalDirectoryCreated, "Number of directories created by the 'plain_rewritable' metadata storage for LocalObjectStorage.") \
M(DiskPlainRewritableLocalDirectoryRemoved, "Number of directories removed by the 'plain_rewritable' metadata storage for LocalObjectStorage.") \
M(DiskPlainRewritableS3DirectoryCreated, "Number of directories created by the 'plain_rewritable' metadata storage for S3ObjectStorage.") \
M(DiskPlainRewritableS3DirectoryRemoved, "Number of directories removed by the 'plain_rewritable' metadata storage for S3ObjectStorage.") \
\
M(S3Clients, "Number of created S3 clients.") \
M(TinyS3Clients, "Number of S3 clients copies which reuse an existing auth provider from another client.") \
\

View File

@ -385,7 +385,7 @@ private:
* PASS is counted from least significant (0), so the first pass is NUM_PASSES - 1.
*/
template <size_t PASS>
static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
static void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
{
/// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
/// Last element will point to array end.
@ -528,7 +528,7 @@ private:
// A helper to choose sorting algorithm based on array length
template <size_t PASS>
static inline void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit)
static void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit)
{
if (size <= INSERTION_SORT_THRESHOLD)
insertionSortInternal(arr, size);

View File

@ -131,12 +131,12 @@ public:
~SpaceSaving() { destroyElements(); }
inline size_t size() const
size_t size() const
{
return counter_list.size();
}
inline size_t capacity() const
size_t capacity() const
{
return m_capacity;
}

View File

@ -107,7 +107,7 @@ struct RUsageCounters
}
private:
static inline UInt64 getClockMonotonic()
static UInt64 getClockMonotonic()
{
struct timespec ts;
if (0 != clock_gettime(CLOCK_MONOTONIC, &ts))

View File

@ -54,16 +54,16 @@ namespace VolnitskyTraits
/// min haystack size to use main algorithm instead of fallback
static constexpr size_t min_haystack_size_for_algorithm = 20000;
static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
static bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
{
return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
}
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
static Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
template <typename Callback>
static inline bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
static bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
{
struct Chars
{
@ -115,7 +115,7 @@ namespace VolnitskyTraits
}
template <typename Callback>
static inline bool putNGramUTF8CaseInsensitive(
static bool putNGramUTF8CaseInsensitive(
const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
{
const UInt8 * end = begin + size;
@ -349,7 +349,7 @@ namespace VolnitskyTraits
}
template <bool CaseSensitive, bool ASCII, typename Callback>
static inline bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
static bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
{
if constexpr (CaseSensitive)
{
@ -580,7 +580,7 @@ public:
return true;
}
inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
{
const size_t fallback_size = fallback_needles.size();
for (size_t i = 0; i < fallback_size; ++i)
@ -609,7 +609,7 @@ public:
return false;
}
inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
{
const size_t fallback_size = fallback_needles.size();
@ -647,7 +647,7 @@ public:
}
template <typename CountCharsCallback>
inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const
UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const
{
const size_t fallback_size = fallback_needles.size();
@ -682,7 +682,7 @@ public:
}
template <typename CountCharsCallback, typename AnsType>
inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const
void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const
{
const size_t fallback_size = fallback_needles.size();
for (size_t i = 0; i < fallback_size; ++i)

View File

@ -491,12 +491,12 @@ public:
incrementErrorMetrics(code);
}
inline static Exception createDeprecated(const std::string & msg, Error code_)
static Exception createDeprecated(const std::string & msg, Error code_)
{
return Exception(msg, code_, 0);
}
inline static Exception fromPath(Error code_, const std::string & path)
static Exception fromPath(Error code_, const std::string & path)
{
return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path);
}
@ -504,7 +504,7 @@ public:
/// Message must be a compile-time constant
template <typename T>
requires std::is_convertible_v<T, String>
inline static Exception fromMessage(Error code_, T && message)
static Exception fromMessage(Error code_, T && message)
{
return Exception(std::forward<T>(message), code_);
}

View File

@ -855,13 +855,13 @@ template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { usi
template <> struct Field::EnumToType<Field::Types::CustomType> { using Type = CustomType; };
template <> struct Field::EnumToType<Field::Types::Bool> { using Type = UInt64; };
inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t)
constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t)
{
return t == Field::Types::Int64
|| t == Field::Types::UInt64;
}
inline constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t)
constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t)
{
return t == Field::Types::Int64
|| t == Field::Types::UInt64

View File

@ -19,16 +19,16 @@ enum class JoinKind : uint8_t
const char * toString(JoinKind kind);
inline constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; }
inline constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; }
inline constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; }
inline constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; }
inline constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; }
inline constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; }
inline constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; }
inline constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; }
inline constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; }
inline constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; }
constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; }
constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; }
constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; }
constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; }
constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; }
constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; }
constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; }
constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; }
constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; }
constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; }
/// Allows more optimal JOIN for typical cases.
enum class JoinStrictness : uint8_t
@ -66,7 +66,7 @@ enum class ASOFJoinInequality : uint8_t
const char * toString(ASOFJoinInequality asof_join_inequality);
inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name)
constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name)
{
ASOFJoinInequality inequality = ASOFJoinInequality::None;
@ -82,7 +82,7 @@ inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_
return inequality;
}
inline constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality)
constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality)
{
if (inequality == ASOFJoinInequality::Less)
return ASOFJoinInequality::Greater;

View File

@ -924,7 +924,7 @@ class IColumn;
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \
M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
M(Bool, allow_deprecated_functions, false, "Allow usage of deprecated functions", 0) \
M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.

View File

@ -85,7 +85,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.6", {{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
{"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
@ -93,7 +94,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
}},
{"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
{"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
@ -101,7 +102,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
{"http_max_chunk_size", 0, 0, "Internal limitation"},
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
{"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},

View File

@ -40,7 +40,7 @@ class BaseDaemon : public Poco::Util::ServerApplication, public Loggers
friend class SignalListener;
public:
static inline constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
BaseDaemon();
~BaseDaemon() override;

View File

@ -147,7 +147,7 @@ public:
static T getScaleMultiplier(UInt32 scale);
inline DecimalUtils::DataTypeDecimalTrait<T> getTrait() const
DecimalUtils::DataTypeDecimalTrait<T> getTrait() const
{
return {precision, scale};
}

View File

@ -516,8 +516,14 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * /* cache */) const
SubstreamsCache * cache) const
{
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
{
column = cached_column;
return;
}
auto mutable_column = column->assumeMutable();
ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(*mutable_column);
@ -671,6 +677,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
}
column = std::move(mutable_column);
addToSubstreamsCache(cache, settings.path, column);
}
void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -936,7 +936,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
query_context->setSetting("allow_experimental_window_functions", 1);
query_context->setSetting("allow_experimental_geo_types", 1);
query_context->setSetting("allow_experimental_map_type", 1);
query_context->setSetting("allow_deprecated_functions", 1);
query_context->setSetting("allow_deprecated_error_prone_window_functions", 1);
query_context->setSetting("allow_suspicious_low_cardinality_types", 1);
query_context->setSetting("allow_suspicious_fixed_string_types", 1);

View File

@ -754,7 +754,7 @@ private:
std::vector<Attribute> attributes;
inline void setCellDeadline(Cell & cell, TimePoint now)
void setCellDeadline(Cell & cell, TimePoint now)
{
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
{
@ -774,7 +774,7 @@ private:
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
}
inline size_t getCellIndex(const KeyType key) const
size_t getCellIndex(const KeyType key) const
{
const size_t hash = DefaultHash<KeyType>()(key);
const size_t index = hash & size_overlap_mask;
@ -783,7 +783,7 @@ private:
using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
{
size_t place_value = getCellIndex(key);
const size_t place_value_end = place_value + max_collision_length;
@ -810,7 +810,7 @@ private:
return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
}
inline size_t getCellIndexForInsert(const KeyType & key) const
size_t getCellIndexForInsert(const KeyType & key) const
{
size_t place_value = getCellIndex(key);
const size_t place_value_end = place_value + max_collision_length;

View File

@ -44,7 +44,7 @@ public:
{
}
inline bool isConstant() const { return default_values_column == nullptr; }
bool isConstant() const { return default_values_column == nullptr; }
Field getDefaultValue(size_t row) const
{
@ -450,17 +450,17 @@ public:
keys_size = key_columns.front()->size();
}
inline size_t getKeysSize() const
size_t getKeysSize() const
{
return keys_size;
}
inline size_t getCurrentKeyIndex() const
size_t getCurrentKeyIndex() const
{
return current_key_index;
}
inline KeyType extractCurrentKey()
KeyType extractCurrentKey()
{
assert(current_key_index < keys_size);

View File

@ -48,14 +48,14 @@ public:
};
private:
static inline constexpr const char * languages[] =
static constexpr const char * languages[] =
{
#define M(NAME, FALLBACK, NUM) #NAME,
FOR_EACH_LANGUAGE(M)
#undef M
};
static inline constexpr Language fallbacks[] =
static constexpr Language fallbacks[] =
{
#define M(NAME, FALLBACK, NUM) Language::FALLBACK,
FOR_EACH_LANGUAGE(M)

View File

@ -26,15 +26,15 @@ struct KeyState
: state(state_)
{}
inline bool isFound() const { return state == State::found; }
inline bool isExpired() const { return state == State::expired; }
inline bool isNotFound() const { return state == State::not_found; }
inline bool isDefault() const { return is_default; }
inline void setDefault() { is_default = true; }
inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
bool isFound() const { return state == State::found; }
bool isExpired() const { return state == State::expired; }
bool isNotFound() const { return state == State::not_found; }
bool isDefault() const { return is_default; }
void setDefault() { is_default = true; }
void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
/// Valid only if keyState is found or expired
inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
size_t getFetchedColumnIndex() const { return fetched_column_index; }
void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
private:
State state = not_found;
size_t fetched_column_index = 0;

View File

@ -66,7 +66,7 @@ namespace
return buf;
}
inline UInt8 prefixIPv6() const
UInt8 prefixIPv6() const
{
return isv6 ? prefix : prefix + 96;
}

View File

@ -474,7 +474,7 @@ public:
}
// Checks if no more values can be added for a given attribute
inline bool full(const String & attr_name, std::unordered_set<String> * const defaults = nullptr) const
bool full(const String & attr_name, std::unordered_set<String> * const defaults = nullptr) const
{
if (collect_values_limit)
{
@ -490,7 +490,7 @@ public:
}
// Returns the number of full attributes
inline size_t attributesFull() const { return n_full_attributes; }
size_t attributesFull() const { return n_full_attributes; }
};
std::pair<String, bool> processBackRefs(const String & data, const re2::RE2 & searcher, const std::vector<StringPiece> & pieces)

View File

@ -134,7 +134,7 @@ public:
/// Reset block with new block_data
/// block_data must be filled with zeroes if it is new block
inline void reset(char * new_block_data)
void reset(char * new_block_data)
{
block_data = new_block_data;
current_block_offset = block_header_size;
@ -142,13 +142,13 @@ public:
}
/// Check if it is enough place to write key in block
inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
{
return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
}
/// Check if it is enough place to write key in block
inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
{
const StringRef & key = cache_key.key;
size_t complex_key_size = sizeof(key.size) + key.size;
@ -159,7 +159,7 @@ public:
/// Write key and returns offset in ssd cache block where data is written
/// It is client responsibility to check if there is enough place in block to write key
/// Returns true if key was written and false if there was not enough place to write key
inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
{
assert(cache_key.size > 0);
@ -188,7 +188,7 @@ public:
return true;
}
inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
{
assert(cache_key.size > 0);
@ -223,20 +223,20 @@ public:
return true;
}
inline size_t getKeysSize() const { return keys_size; }
size_t getKeysSize() const { return keys_size; }
/// Write keys size into block header
inline void writeKeysSize()
void writeKeysSize()
{
char * keys_size_offset_data = block_data + block_header_check_sum_size;
std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
}
/// Get check sum from block header
inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
/// Calculate check sum in block
inline size_t calculateCheckSum() const
size_t calculateCheckSum() const
{
size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
@ -244,7 +244,7 @@ public:
}
/// Check if check sum from block header matched calculated check sum in block
inline bool checkCheckSum() const
bool checkCheckSum() const
{
size_t calculated_check_sum = calculateCheckSum();
size_t check_sum = getCheckSum();
@ -253,16 +253,16 @@ public:
}
/// Write check sum in block header
inline void writeCheckSum()
void writeCheckSum()
{
size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
std::memcpy(block_data, &check_sum, sizeof(size_t));
}
inline size_t getBlockSize() const { return block_size; }
size_t getBlockSize() const { return block_size; }
/// Returns block data
inline char * getBlockData() const { return block_data; }
char * getBlockData() const { return block_data; }
/// Read keys that were serialized in block
/// It is client responsibility to ensure that simple or complex keys were written in block
@ -405,16 +405,16 @@ public:
current_write_block.writeCheckSum();
}
inline char * getPlace(SSDCacheIndex index) const
char * getPlace(SSDCacheIndex index) const
{
return buffer.m_data + index.block_index * block_size + index.offset_in_block;
}
inline size_t getCurrentBlockIndex() const { return current_block_index; }
size_t getCurrentBlockIndex() const { return current_block_index; }
inline const char * getData() const { return buffer.m_data; }
const char * getData() const { return buffer.m_data; }
inline size_t getSizeInBytes() const { return block_size * partition_blocks_size; }
size_t getSizeInBytes() const { return block_size * partition_blocks_size; }
void readKeys(PaddedPODArray<KeyType> & keys) const
{
@ -431,7 +431,7 @@ public:
}
}
inline void reset()
void reset()
{
current_block_index = 0;
current_write_block.reset(buffer.m_data);
@ -750,9 +750,9 @@ public:
}
}
inline size_t getCurrentBlockIndex() const { return current_block_index; }
size_t getCurrentBlockIndex() const { return current_block_index; }
inline void reset()
void reset()
{
current_block_index = 0;
}
@ -788,7 +788,7 @@ private:
int fd = -1;
};
inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
static int preallocateDiskSpace(int fd, size_t offset, size_t len)
{
#if defined(OS_FREEBSD)
return posix_fallocate(fd, offset, len);
@ -797,7 +797,7 @@ private:
#endif
}
inline static char * getRequestBuffer(const iocb & request)
static char * getRequestBuffer(const iocb & request)
{
char * result = nullptr;
@ -810,7 +810,7 @@ private:
return result;
}
inline static ssize_t eventResult(io_event & event)
static ssize_t eventResult(io_event & event)
{
ssize_t bytes_written;
@ -985,9 +985,9 @@ private:
size_t in_memory_partition_index;
CellState state;
inline bool isInMemory() const { return state == in_memory; }
inline bool isOnDisk() const { return state == on_disk; }
inline bool isDefaultValue() const { return state == default_value; }
bool isInMemory() const { return state == in_memory; }
bool isOnDisk() const { return state == on_disk; }
bool isDefaultValue() const { return state == default_value; }
};
struct KeyToBlockOffset
@ -1366,7 +1366,7 @@ private:
}
}
inline void setCellDeadline(Cell & cell, TimePoint now)
void setCellDeadline(Cell & cell, TimePoint now)
{
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
{
@ -1383,7 +1383,7 @@ private:
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
}
inline void eraseKeyFromIndex(KeyType key)
void eraseKeyFromIndex(KeyType key)
{
auto it = index.find(key);

View File

@ -350,6 +350,13 @@ public:
return delegate;
}
#if USE_AWS_S3
std::shared_ptr<const S3::Client> getS3StorageClient() const override
{
return delegate->getS3StorageClient();
}
#endif
private:
String wrappedPath(const String & path) const
{

View File

@ -14,7 +14,6 @@
#include <Disks/DirectoryIterator.h>
#include <memory>
#include <mutex>
#include <utility>
#include <boost/noncopyable.hpp>
#include <Poco/Timestamp.h>
@ -116,13 +115,18 @@ public:
/// Default constructor.
IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
: name(name_)
, copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, config.getUInt(config_prefix + ".thread_pool_size", 16))
, copying_thread_pool(
CurrentMetrics::IDiskCopierThreads,
CurrentMetrics::IDiskCopierThreadsActive,
CurrentMetrics::IDiskCopierThreadsScheduled,
config.getUInt(config_prefix + ".thread_pool_size", 16))
{
}
explicit IDisk(const String & name_)
: name(name_)
, copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16)
, copying_thread_pool(
CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16)
{
}
@ -466,6 +470,17 @@ public:
virtual DiskPtr getDelegateDiskIfExists() const { return nullptr; }
#if USE_AWS_S3
virtual std::shared_ptr<const S3::Client> getS3StorageClient() const
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Method getS3StorageClient() is not implemented for disk type: {}",
getDataSourceDescription().toString());
}
#endif
protected:
friend class DiskDecorator;

View File

@ -61,12 +61,12 @@ private:
void monitorRing();
template<typename T> inline void failPromise(std::promise<T> & promise, const Exception & ex)
template<typename T> void failPromise(std::promise<T> & promise, const Exception & ex)
{
promise.set_exception(std::make_exception_ptr(ex));
}
inline std::future<Result> makeFailedResult(const Exception & ex)
std::future<Result> makeFailedResult(const Exception & ex)
{
auto promise = std::promise<Result>{};
failPromise(promise, ex);

View File

@ -127,6 +127,13 @@ public:
}
#endif
#if USE_AWS_S3
std::shared_ptr<const S3::Client> getS3StorageClient() override
{
return object_storage->getS3StorageClient();
}
#endif
private:
FileCacheKey getCacheKey(const std::string & path) const;

View File

@ -582,6 +582,12 @@ UInt64 DiskObjectStorage::getRevision() const
return metadata_helper->getRevision();
}
#if USE_AWS_S3
std::shared_ptr<const S3::Client> DiskObjectStorage::getS3StorageClient() const
{
return object_storage->getS3StorageClient();
}
#endif
DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const
{

View File

@ -6,6 +6,8 @@
#include <Disks/ObjectStorages/IMetadataStorage.h>
#include <Common/re2.h>
#include "config.h"
namespace CurrentMetrics
{
@ -210,6 +212,10 @@ public:
bool supportsChmod() const override { return metadata_storage->supportsChmod(); }
void chmod(const String & path, mode_t mode) override;
#if USE_AWS_S3
std::shared_ptr<const S3::Client> getS3StorageClient() const override;
#endif
private:
/// Create actual disk object storage transaction for operations

View File

@ -18,6 +18,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
const MetadataStorageMetrics & IObjectStorage::getMetadataStorageMetrics() const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getMetadataStorageMetrics' is not implemented");
}
bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
{
RelativePathsWithMetadata files;

View File

@ -1,10 +1,10 @@
#pragma once
#include <filesystem>
#include <string>
#include <map>
#include <mutex>
#include <optional>
#include <filesystem>
#include <Poco/Timestamp.h>
#include <Poco/Util/AbstractConfiguration.h>
@ -13,17 +13,18 @@
#include <IO/WriteSettings.h>
#include <IO/copyData.h>
#include <Disks/ObjectStorages/StoredObject.h>
#include <Disks/DiskType.h>
#include <Common/ThreadPool_fwd.h>
#include <Common/ObjectStorageKey.h>
#include <Disks/WriteMode.h>
#include <Interpreters/Context_fwd.h>
#include <Core/Types.h>
#include <Disks/DirectoryIterator.h>
#include <Common/ThreadPool.h>
#include <Common/threadPoolCallbackRunner.h>
#include <Disks/DiskType.h>
#include <Disks/ObjectStorages/MetadataStorageMetrics.h>
#include <Disks/ObjectStorages/StoredObject.h>
#include <Disks/WriteMode.h>
#include <Interpreters/Context_fwd.h>
#include <Common/Exception.h>
#include <Common/ObjectStorageKey.h>
#include <Common/ThreadPool.h>
#include <Common/ThreadPool_fwd.h>
#include <Common/threadPoolCallbackRunner.h>
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
@ -31,6 +32,10 @@
#include <azure/storage/blobs.hpp>
#endif
#if USE_AWS_S3
#include <IO/S3/Client.h>
#endif
namespace DB
{
@ -111,6 +116,8 @@ public:
virtual std::string getDescription() const = 0;
virtual const MetadataStorageMetrics & getMetadataStorageMetrics() const;
/// Object exists or not
virtual bool exists(const StoredObject & object) const = 0;
@ -257,6 +264,13 @@ public:
}
#endif
#if USE_AWS_S3
virtual std::shared_ptr<const S3::Client> getS3StorageClient()
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage");
}
#endif
private:
mutable std::mutex throttlers_mutex;

View File

@ -52,11 +52,16 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
[[maybe_unused]] auto result = path_map.emplace(path, std::move(key_prefix));
chassert(result.second);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, 1);
writeString(path.string(), *buf);
buf->finalize();
write_finalized = true;
auto event = object_storage->getMetadataStorageMetrics().directory_created;
ProfileEvents::increment(event);
}
void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
@ -65,6 +70,9 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un
if (write_finalized)
{
path_map.erase(path);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, 1);
object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
}
else if (write_created)
@ -165,7 +173,15 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std:
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
object_storage->removeObject(object);
path_map.erase(path_it);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, 1);
removed = true;
auto event = object_storage->getMetadataStorageMetrics().directory_removed;
ProfileEvents::increment(event);
}
void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
@ -185,6 +201,8 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
buf->finalize();
path_map.emplace(path, std::move(key_prefix));
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, 1);
}
}

View File

@ -50,6 +50,8 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
res.first->second,
remote_path.parent_path().string());
}
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, result.size());
return result;
}
@ -134,6 +136,12 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita
object_storage->setKeysGenerator(keys_gen);
}
MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage()
{
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, path_map->size());
}
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const
{

View File

@ -14,6 +14,7 @@ private:
public:
MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_);
~MetadataStorageFromPlainRewritableObjectStorage() override;
MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; }

View File

@ -0,0 +1,24 @@
#pragma once
#include <Disks/DiskType.h>
#include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h>
namespace DB
{
struct MetadataStorageMetrics
{
const ProfileEvents::Event directory_created = ProfileEvents::end();
const ProfileEvents::Event directory_removed = ProfileEvents::end();
CurrentMetrics::Metric directory_map_size = CurrentMetrics::end();
template <typename ObjectStorage, MetadataStorageType metadata_type>
static MetadataStorageMetrics create()
{
return MetadataStorageMetrics{};
}
};
}

View File

@ -23,6 +23,7 @@
#include <Disks/ObjectStorages/MetadataStorageFactory.h>
#include <Disks/ObjectStorages/PlainObjectStorage.h>
#include <Disks/ObjectStorages/PlainRewritableObjectStorage.h>
#include <Disks/ObjectStorages/createMetadataStorageMetrics.h>
#include <Interpreters/Context.h>
#include <Common/Macros.h>
@ -85,7 +86,9 @@ ObjectStoragePtr createObjectStorage(
DataSourceDescription{DataSourceType::ObjectStorage, type, MetadataStorageType::PlainRewritable, /*description*/ ""}
.toString());
return std::make_shared<PlainRewritableObjectStorage<BaseObjectStorage>>(std::forward<Args>(args)...);
auto metadata_storage_metrics = DB::MetadataStorageMetrics::create<BaseObjectStorage, MetadataStorageType::PlainRewritable>();
return std::make_shared<PlainRewritableObjectStorage<BaseObjectStorage>>(
std::move(metadata_storage_metrics), std::forward<Args>(args)...);
}
else
return std::make_shared<BaseObjectStorage>(std::forward<Args>(args)...);
@ -256,8 +259,9 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory)
auto client = getClient(config, config_prefix, context, *settings, true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto metadata_storage_metrics = DB::MetadataStorageMetrics::create<S3ObjectStorage, MetadataStorageType::PlainRewritable>();
auto object_storage = std::make_shared<PlainRewritableObjectStorage<S3ObjectStorage>>(
std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
std::move(metadata_storage_metrics), std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)

View File

@ -16,8 +16,9 @@ class PlainRewritableObjectStorage : public BaseObjectStorage
{
public:
template <class... Args>
explicit PlainRewritableObjectStorage(Args &&... args)
explicit PlainRewritableObjectStorage(MetadataStorageMetrics && metadata_storage_metrics_, Args &&... args)
: BaseObjectStorage(std::forward<Args>(args)...)
, metadata_storage_metrics(std::move(metadata_storage_metrics_))
/// A basic key generator is required for checking S3 capabilities,
/// it will be reset later by metadata storage.
, key_generator(createObjectStorageKeysGeneratorAsIsWithPrefix(BaseObjectStorage::getCommonKeyPrefix()))
@ -26,6 +27,8 @@ public:
std::string getName() const override { return "PlainRewritable" + BaseObjectStorage::getName(); }
const MetadataStorageMetrics & getMetadataStorageMetrics() const override { return metadata_storage_metrics; }
bool isWriteOnce() const override { return false; }
bool isPlain() const override { return true; }
@ -37,6 +40,7 @@ public:
void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; }
private:
MetadataStorageMetrics metadata_storage_metrics;
ObjectStorageKeysGeneratorPtr key_generator;
};

View File

@ -495,13 +495,14 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
try
{
copyS3File(
current_client,
uri.bucket,
object_from.remote_path,
0,
size,
dest_s3->uri.bucket,
object_to.remote_path,
/*src_s3_client=*/current_client,
/*src_bucket=*/uri.bucket,
/*src_key=*/object_from.remote_path,
/*src_offset=*/0,
/*src_size=*/size,
/*dest_s3_client=*/current_client,
/*dest_bucket=*/dest_s3->uri.bucket,
/*dest_key=*/object_to.remote_path,
settings_ptr->request_settings,
patchSettings(read_settings),
BlobStorageLogWriter::create(disk_name),
@ -535,13 +536,15 @@ void S3ObjectStorage::copyObject( // NOLINT
auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings);
auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
copyS3File(current_client,
uri.bucket,
object_from.remote_path,
0,
size,
uri.bucket,
object_to.remote_path,
copyS3File(
/*src_s3_client=*/current_client,
/*src_bucket=*/uri.bucket,
/*src_key=*/object_from.remote_path,
/*src_offset=*/0,
/*src_size=*/size,
/*dest_s3_client=*/current_client,
/*dest_bucket=*/uri.bucket,
/*dest_key=*/object_to.remote_path,
settings_ptr->request_settings,
patchSettings(read_settings),
BlobStorageLogWriter::create(disk_name),
@ -617,6 +620,11 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p
return key_generator->generate(path, /* is_directory */ false);
}
std::shared_ptr<const S3::Client> S3ObjectStorage::getS3StorageClient()
{
return client.get();
}
}
#endif

View File

@ -168,6 +168,7 @@ public:
bool isReadOnly() const override { return s3_settings.get()->read_only; }
std::shared_ptr<const S3::Client> getS3StorageClient() override;
private:
void setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_);

View File

@ -3,6 +3,8 @@
#include "config.h"
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <filesystem>
#include <shared_mutex>
namespace Poco

View File

@ -0,0 +1,67 @@
#pragma once
#if USE_AWS_S3
# include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
#endif
#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
# include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
#endif
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
# include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
#endif
#include <Disks/ObjectStorages/MetadataStorageMetrics.h>
namespace ProfileEvents
{
extern const Event DiskPlainRewritableAzureDirectoryCreated;
extern const Event DiskPlainRewritableAzureDirectoryRemoved;
extern const Event DiskPlainRewritableLocalDirectoryCreated;
extern const Event DiskPlainRewritableLocalDirectoryRemoved;
extern const Event DiskPlainRewritableS3DirectoryCreated;
extern const Event DiskPlainRewritableS3DirectoryRemoved;
}
namespace CurrentMetrics
{
extern const Metric DiskPlainRewritableAzureDirectoryMapSize;
extern const Metric DiskPlainRewritableLocalDirectoryMapSize;
extern const Metric DiskPlainRewritableS3DirectoryMapSize;
}
namespace DB
{
#if USE_AWS_S3
template <>
inline MetadataStorageMetrics MetadataStorageMetrics::create<S3ObjectStorage, MetadataStorageType::PlainRewritable>()
{
return MetadataStorageMetrics{
.directory_created = ProfileEvents::DiskPlainRewritableS3DirectoryCreated,
.directory_removed = ProfileEvents::DiskPlainRewritableS3DirectoryRemoved,
.directory_map_size = CurrentMetrics::DiskPlainRewritableS3DirectoryMapSize};
}
#endif
#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
template <>
inline MetadataStorageMetrics MetadataStorageMetrics::create<AzureObjectStorage, MetadataStorageType::PlainRewritable>()
{
return MetadataStorageMetrics{
.directory_created = ProfileEvents::DiskPlainRewritableAzureDirectoryCreated,
.directory_removed = ProfileEvents::DiskPlainRewritableAzureDirectoryRemoved,
.directory_map_size = CurrentMetrics::DiskPlainRewritableAzureDirectoryMapSize};
}
#endif
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
template <>
inline MetadataStorageMetrics MetadataStorageMetrics::create<LocalObjectStorage, MetadataStorageType::PlainRewritable>()
{
return MetadataStorageMetrics{
.directory_created = ProfileEvents::DiskPlainRewritableLocalDirectoryCreated,
.directory_removed = ProfileEvents::DiskPlainRewritableLocalDirectoryRemoved,
.directory_map_size = CurrentMetrics::DiskPlainRewritableLocalDirectoryMapSize};
}
#endif
}

View File

@ -68,7 +68,7 @@ struct DivideIntegralImpl
static const constexpr bool allow_string_integer = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
static Result apply(A a, B b)
{
using CastA = std::conditional_t<is_big_int_v<B> && std::is_same_v<A, UInt8>, uint8_t, A>;
using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, UInt8>, uint8_t, B>;
@ -120,7 +120,7 @@ struct ModuloImpl
static const constexpr bool allow_string_integer = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
static Result apply(A a, B b)
{
if constexpr (std::is_floating_point_v<ResultType>)
{
@ -175,7 +175,7 @@ struct PositiveModuloImpl : ModuloImpl<A, B>
using ResultType = typename NumberTraits::ResultOfPositiveModulo<A, B>::Type;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
static Result apply(A a, B b)
{
auto res = ModuloImpl<A, B>::template apply<OriginResultType>(a, b);
if constexpr (is_signed_v<A>)

View File

@ -284,7 +284,7 @@ struct BinaryOperation
private:
template <OpCase op_case>
static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
{
if constexpr (op_case == OpCase::Vector)
c[i] = Op::template apply<ResultType>(a[i], b[i]);
@ -432,7 +432,7 @@ template <typename Op>
struct FixedStringReduceOperationImpl
{
template <OpCase op_case>
static void inline process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N)
static void process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N)
{
if constexpr (op_case == OpCase::Vector)
vectorVector(a, b, result, size, N);
@ -503,7 +503,7 @@ struct StringReduceOperationImpl
}
}
static inline UInt64 constConst(std::string_view a, std::string_view b)
static UInt64 constConst(std::string_view a, std::string_view b)
{
return process(
reinterpret_cast<const UInt8 *>(a.data()),
@ -643,7 +643,7 @@ public:
private:
template <OpCase op_case, typename ApplyFunc>
static inline void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func)
static void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func)
{
if (right_nullmap)
{

Some files were not shown because too many files have changed in this diff Show More