Merge remote-tracking branch 'origin/master' into pr-right-joins

This commit is contained in:
Igor Nikonov 2024-11-07 12:17:24 +00:00
commit 3c37c1e6c4
151 changed files with 1810 additions and 1347 deletions

View File

@ -1,7 +1,7 @@
# The Dockerfile.ubuntu exists for the tests/ci/docker_server.py script
# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically,
# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes
FROM ubuntu:20.04 AS glibc-donor
FROM ubuntu:22.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
@ -9,7 +9,11 @@ RUN arch=${TARGETARCH:-amd64} \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu \
&& case $arch in \
amd64) ln /lib/linux-gnu/ld-linux-x86-64.so.2 /lib/linux-gnu/ld-2.35.so ;; \
arm64) ln /lib/linux-gnu/ld-linux-aarch64.so.1 /lib/linux-gnu/ld-2.35.so ;; \
esac
FROM alpine
@ -20,7 +24,7 @@ ENV LANG=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.35.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh

View File

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:22.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
# It could be removed after we move on a version 23:04+

View File

@ -20,6 +20,7 @@ For more information and documentation see https://clickhouse.com/.
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A) and additionally the Load-Acquire RCpc register. The register is optional in version ARMv8.2-A and mandatory in [ARMv8.3-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.3-A). Supported in Graviton >=2, Azure and GCP instances. Examples for unsupported devices are Raspberry Pi 4 (ARMv8.0-A) and Jetson AGX Xavier/Orin (ARMv8.2-A).
- Since the Clickhouse 24.11 Ubuntu images started using `ubuntu:22.04` as its base image. It requires docker version >= `20.10.10` containing [patch](https://github.com/moby/moby/commit/977283509f75303bc6612665a04abf76ff1d2468). As a workaround you could use `docker run [--privileged | --security-opt seccomp=unconfined]` instead, however that has security implications.
## How to use this image

View File

@ -1,16 +0,0 @@
# Since right now we can't set volumes to the docker during build, we split building container in stages:
# 1. build base container
# 2. run base conatiner with mounted volumes
# 3. commit container as image
FROM ubuntu:20.04 as clickhouse-test-runner-base
# A volume where directory with clickhouse packages to be mounted,
# for later installing.
VOLUME /packages
CMD apt-get update ;\
DEBIAN_FRONTEND=noninteractive \
apt install -y /packages/clickhouse-common-static_*.deb \
/packages/clickhouse-client_*.deb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -4,9 +4,13 @@ sidebar_position: 50
sidebar_label: EmbeddedRocksDB
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
# EmbeddedRocksDB Engine
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
<CloudNotSupportedBadge />
This engine allows integrating ClickHouse with [RocksDB](http://rocksdb.org/).
## Creating a Table {#creating-a-table}

View File

@ -9,7 +9,7 @@ sidebar_label: Prometheus protocols
## Exposing metrics {#expose}
:::note
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
If you are using ClickHouse Cloud, you can expose metrics to Prometheus using the [Prometheus Integration](/en/integrations/prometheus).
:::
ClickHouse can expose its own metrics for scraping from Prometheus:

View File

@ -65,6 +65,34 @@ sudo rm -f /etc/yum.repos.d/clickhouse.repo
After that follow the [install guide](../getting-started/install.md#from-rpm-packages)
### You Can't Run Docker Container
You are running a simple `docker run clickhouse/clickhouse-server` and it crashes with a stack trace similar to following:
```
$ docker run -it clickhouse/clickhouse-server
........
2024.11.06 21:04:48.912036 [ 1 ] {} <Information> SentryWriter: Sending crash reports is disabled
Poco::Exception. Code: 1000, e.code() = 0, System exception: cannot start thread, Stack trace (when copying this message, always include the lines below):
0. Poco::ThreadImpl::startImpl(Poco::SharedPtr<Poco::Runnable, Poco::ReferenceCounter, Poco::ReleasePolicy<Poco::Runnable>>) @ 0x00000000157c7b34
1. Poco::Thread::start(Poco::Runnable&) @ 0x00000000157c8a0e
2. BaseDaemon::initializeTerminationAndSignalProcessing() @ 0x000000000d267a14
3. BaseDaemon::initialize(Poco::Util::Application&) @ 0x000000000d2652cb
4. DB::Server::initialize(Poco::Util::Application&) @ 0x000000000d128b38
5. Poco::Util::Application::run() @ 0x000000001581cfda
6. DB::Server::run() @ 0x000000000d1288f0
7. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000015825e27
8. mainEntryClickHouseServer(int, char**) @ 0x000000000d125b38
9. main @ 0x0000000007ea4eee
10. ? @ 0x00007f67ff946d90
11. ? @ 0x00007f67ff946e40
12. _start @ 0x00000000062e802e
(version 24.10.1.2812 (official build))
```
The reason is an old docker daemon with version lower than `20.10.10`. A way to fix it either upgrading it, or running `docker run [--privileged | --security-opt seccomp=unconfined]`. The latter has security implications.
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible issues:

View File

@ -19,7 +19,7 @@ Columns:
- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted.
- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values:
- `0` — The row describes a partial revoke.
- `1` — The row describes a grant.
- `0` — The row describes a grant.
- `1` — The row describes a partial revoke.
- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#granting-privilege-syntax).

View File

@ -58,10 +58,10 @@ SELECT json FROM test;
└───────────────────────────────────┘
```
Using CAST from 'String':
Using CAST from `String`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json;
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json;
```
```text
@ -70,7 +70,47 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json
└────────────────────────────────────────────────┘
```
CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
Using CAST from `Tuple`:
```sql
SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
Using CAST from `Map`:
```sql
SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
Using CAST from deprecated `Object('json')`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
:::note
CAST from `Tuple`/`Map`/`Object('json')` to `JSON` is implemented via serializing the column into `String` column containing JSON objects and deserializing it back to `JSON` type column.
:::
CAST between `JSON` types with different arguments will be supported later.
## Reading JSON paths as subcolumns

View File

@ -291,7 +291,7 @@ All missed values of `expr` column will be filled sequentially and other columns
To fill multiple columns, add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr] [STALENESS const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr] [STALENESS numeric_expr]
[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]]
```
@ -300,6 +300,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
When `STALENESS const_numeric_expr` is defined, the query will generate rows until the difference from the previous row in the original data exceeds `const_numeric_expr`.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns.
Example of a query without `WITH FILL`:
@ -497,6 +498,64 @@ Result:
└────────────┴────────────┴──────────┘
```
Example of a query without `STALENESS`:
``` sql
SELECT number as key, 5 * number value, 'original' AS source
FROM numbers(16) WHERE key % 5 == 0
ORDER BY key WITH FILL;
```
Result:
``` text
┌─key─┬─value─┬─source───┐
1. │ 0 │ 0 │ original │
2. │ 1 │ 0 │ │
3. │ 2 │ 0 │ │
4. │ 3 │ 0 │ │
5. │ 4 │ 0 │ │
6. │ 5 │ 25 │ original │
7. │ 6 │ 0 │ │
8. │ 7 │ 0 │ │
9. │ 8 │ 0 │ │
10. │ 9 │ 0 │ │
11. │ 10 │ 50 │ original │
12. │ 11 │ 0 │ │
13. │ 12 │ 0 │ │
14. │ 13 │ 0 │ │
15. │ 14 │ 0 │ │
16. │ 15 │ 75 │ original │
└─────┴───────┴──────────┘
```
Same query after applying `STALENESS 3`:
``` sql
SELECT number as key, 5 * number value, 'original' AS source
FROM numbers(16) WHERE key % 5 == 0
ORDER BY key WITH FILL STALENESS 3;
```
Result:
``` text
┌─key─┬─value─┬─source───┐
1. │ 0 │ 0 │ original │
2. │ 1 │ 0 │ │
3. │ 2 │ 0 │ │
4. │ 5 │ 25 │ original │
5. │ 6 │ 0 │ │
6. │ 7 │ 0 │ │
7. │ 10 │ 50 │ original │
8. │ 11 │ 0 │ │
9. │ 12 │ 0 │ │
10. │ 15 │ 75 │ original │
11. │ 16 │ 0 │ │
12. │ 17 │ 0 │ │
└─────┴───────┴──────────┘
```
Example of a query without `INTERPOLATE`:
``` sql

View File

@ -387,7 +387,7 @@ template <typename Value, bool return_float, bool interpolated>
using FuncQuantileExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantileExactWeighted,
std::conditional_t<interpolated, NameQuantileExactWeightedInterpolated, NameQuantileExactWeighted>,
true,
std::conditional_t<return_float, Float64, void>,
false,
@ -396,7 +396,7 @@ template <typename Value, bool return_float, bool interpolated>
using FuncQuantilesExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantilesExactWeighted,
std::conditional_t<interpolated, NameQuantilesExactWeightedInterpolated, NameQuantilesExactWeighted>,
true,
std::conditional_t<return_float, Float64, void>,
true,

View File

@ -498,6 +498,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context);
if (order_by_element.getFillStep())
sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context);
if (order_by_element.getFillStaleness())
sort_node->getFillStaleness() = buildExpression(order_by_element.getFillStaleness(), context);
list_node->getNodes().push_back(std::move(sort_node));
}

View File

@ -437,8 +437,13 @@ ProjectionName QueryAnalyzer::calculateWindowProjectionName(const QueryTreeNodeP
return buffer.str();
}
ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(const QueryTreeNodePtr & sort_column_node, const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name, const ProjectionName & fill_to_expression_projection_name, const ProjectionName & fill_step_expression_projection_name)
ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(
const QueryTreeNodePtr & sort_column_node,
const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name,
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name,
const ProjectionName & fill_staleness_expression_projection_name)
{
auto & sort_node_typed = sort_column_node->as<SortNode &>();
@ -468,6 +473,9 @@ ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(const QueryTreeN
if (sort_node_typed.hasFillStep())
sort_column_projection_name_buffer << " STEP " << fill_step_expression_projection_name;
if (sort_node_typed.hasFillStaleness())
sort_column_projection_name_buffer << " STALENESS " << fill_staleness_expression_projection_name;
}
return sort_column_projection_name_buffer.str();
@ -3998,6 +4006,7 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
ProjectionNames fill_from_expression_projection_names;
ProjectionNames fill_to_expression_projection_names;
ProjectionNames fill_step_expression_projection_names;
ProjectionNames fill_staleness_expression_projection_names;
auto & sort_node_list_typed = sort_node_list->as<ListNode &>();
for (auto & node : sort_node_list_typed.getNodes())
@ -4088,11 +4097,38 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
fill_step_expression_projection_names_size);
}
if (sort_node.hasFillStaleness())
{
fill_staleness_expression_projection_names = resolveExpressionNode(sort_node.getFillStaleness(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
const auto * constant_node = sort_node.getFillStaleness()->as<ConstantNode>();
if (!constant_node)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Sort FILL STALENESS expression must be constant with numeric or interval type. Actual {}. In scope {}",
sort_node.getFillStaleness()->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
bool is_number = isColumnedAsNumber(constant_node->getResultType());
bool is_interval = WhichDataType(constant_node->getResultType()).isInterval();
if (!is_number && !is_interval)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Sort FILL STALENESS expression must be constant with numeric or interval type. Actual {}. In scope {}",
sort_node.getFillStaleness()->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
size_t fill_staleness_expression_projection_names_size = fill_staleness_expression_projection_names.size();
if (fill_staleness_expression_projection_names_size != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Sort FILL STALENESS expression expected 1 projection name. Actual {}",
fill_staleness_expression_projection_names_size);
}
auto sort_column_projection_name = calculateSortColumnProjectionName(node,
sort_expression_projection_names[0],
fill_from_expression_projection_names.empty() ? "" : fill_from_expression_projection_names.front(),
fill_to_expression_projection_names.empty() ? "" : fill_to_expression_projection_names.front(),
fill_step_expression_projection_names.empty() ? "" : fill_step_expression_projection_names.front());
fill_step_expression_projection_names.empty() ? "" : fill_step_expression_projection_names.front(),
fill_staleness_expression_projection_names.empty() ? "" : fill_staleness_expression_projection_names.front());
result_projection_names.push_back(std::move(sort_column_projection_name));
@ -4100,6 +4136,7 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
fill_from_expression_projection_names.clear();
fill_to_expression_projection_names.clear();
fill_step_expression_projection_names.clear();
fill_staleness_expression_projection_names.clear();
}
return result_projection_names;

View File

@ -140,7 +140,8 @@ private:
const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name,
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name);
const ProjectionName & fill_step_expression_projection_name,
const ProjectionName & fill_staleness_expression_projection_name);
QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);

View File

@ -69,6 +69,12 @@ void SortNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si
buffer << '\n' << std::string(indent + 2, ' ') << "FILL STEP\n";
getFillStep()->dumpTreeImpl(buffer, format_state, indent + 4);
}
if (hasFillStaleness())
{
buffer << '\n' << std::string(indent + 2, ' ') << "FILL STALENESS\n";
getFillStaleness()->dumpTreeImpl(buffer, format_state, indent + 4);
}
}
bool SortNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const
@ -132,6 +138,8 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
result->setFillTo(getFillTo()->toAST(options));
if (hasFillStep())
result->setFillStep(getFillStep()->toAST(options));
if (hasFillStaleness())
result->setFillStaleness(getFillStaleness()->toAST(options));
return result;
}

View File

@ -105,6 +105,24 @@ public:
return children[fill_step_child_index];
}
/// Returns true if sort node has fill staleness, false otherwise
bool hasFillStaleness() const
{
return children[fill_staleness_child_index] != nullptr;
}
/// Get fill staleness
const QueryTreeNodePtr & getFillStaleness() const
{
return children[fill_staleness_child_index];
}
/// Get fill staleness
QueryTreeNodePtr & getFillStaleness()
{
return children[fill_staleness_child_index];
}
/// Get collator
const std::shared_ptr<Collator> & getCollator() const
{
@ -144,7 +162,8 @@ private:
static constexpr size_t fill_from_child_index = 1;
static constexpr size_t fill_to_child_index = 2;
static constexpr size_t fill_step_child_index = 3;
static constexpr size_t children_size = fill_step_child_index + 1;
static constexpr size_t fill_staleness_child_index = 4;
static constexpr size_t children_size = fill_staleness_child_index + 1;
SortDirection sort_direction = SortDirection::ASCENDING;
std::optional<SortDirection> nulls_sort_direction;

View File

@ -0,0 +1,30 @@
#include <Common/FieldVisitorScale.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
FieldVisitorScale::FieldVisitorScale(Int32 rhs_) : rhs(rhs_) {}
void FieldVisitorScale::operator() (Int64 & x) const { x *= rhs; }
void FieldVisitorScale::operator() (UInt64 & x) const { x *= rhs; }
void FieldVisitorScale::operator() (Float64 & x) const { x *= rhs; }
void FieldVisitorScale::operator() (Null &) const { /*Do not scale anything*/ }
void FieldVisitorScale::operator() (String &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Strings"); }
void FieldVisitorScale::operator() (Array &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Arrays"); }
void FieldVisitorScale::operator() (Tuple &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Tuples"); }
void FieldVisitorScale::operator() (Map &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Maps"); }
void FieldVisitorScale::operator() (Object &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Objects"); }
void FieldVisitorScale::operator() (UUID &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale UUIDs"); }
void FieldVisitorScale::operator() (IPv4 &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale IPv4s"); }
void FieldVisitorScale::operator() (IPv6 &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale IPv6s"); }
void FieldVisitorScale::operator() (CustomType & x) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale custom type {}", x.getTypeName()); }
void FieldVisitorScale::operator() (AggregateFunctionStateData &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale AggregateFunctionStates"); }
void FieldVisitorScale::operator() (bool &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot scale Bools"); }
}

View File

@ -0,0 +1,43 @@
#pragma once
#include <Common/FieldVisitors.h>
#include <Common/FieldVisitorConvertToNumber.h>
namespace DB
{
/** Implements `*=` operation by number
*/
class FieldVisitorScale : public StaticVisitor<void>
{
private:
Int32 rhs;
public:
explicit FieldVisitorScale(Int32 rhs_);
void operator() (Int64 & x) const;
void operator() (UInt64 & x) const;
void operator() (Float64 & x) const;
void operator() (Null &) const;
[[noreturn]] void operator() (String &) const;
[[noreturn]] void operator() (Array &) const;
[[noreturn]] void operator() (Tuple &) const;
[[noreturn]] void operator() (Map &) const;
[[noreturn]] void operator() (Object &) const;
[[noreturn]] void operator() (UUID &) const;
[[noreturn]] void operator() (IPv4 &) const;
[[noreturn]] void operator() (IPv6 &) const;
[[noreturn]] void operator() (AggregateFunctionStateData &) const;
[[noreturn]] void operator() (CustomType &) const;
[[noreturn]] void operator() (bool &) const;
template <typename T>
void operator() (DecimalField<T> & x) const { x = DecimalField<T>(x.getValue() * T(rhs), x.getScale()); }
template <typename T>
requires is_big_int_v<T>
void operator() (T & x) const { x *= rhs; }
};
}

View File

@ -119,15 +119,4 @@ enum class JoinTableSide : uint8_t
const char * toString(JoinTableSide join_table_side);
/// Setting to choose which table to use as the inner table in hash join
enum class JoinInnerTableSelectionMode : uint8_t
{
/// Use left table
Left,
/// Use right table
Right,
/// Use the table with the smallest number of rows
Auto,
};
}

View File

@ -1912,9 +1912,6 @@ See also:
For single JOIN in case of identifier ambiguity prefer left table
)", IMPORTANT) \
\
DECLARE(JoinInnerTableSelectionMode, query_plan_join_inner_table_selection, JoinInnerTableSelectionMode::Auto, R"(
Select the side of the join to be the inner table in the query plan. Supported only for `ALL` join strictness with `JOIN ON` clause. Possible values: 'auto', 'left', 'right'.
)", 0) \
DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"(
This setting adjusts the data block size for query processing and represents additional fine-tuning to the more rough 'max_block_size' setting. If the columns are large and with 'max_block_size' rows the block size is likely to be larger than the specified amount of bytes, its size will be lowered for better CPU cache locality.
)", 0) \
@ -4239,7 +4236,7 @@ Rewrite aggregate functions with if expression as argument when logically equiva
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
:::note
Supported only with experimental analyzer (`enable_analyzer = 1`).
Supported only with the analyzer (`enable_analyzer = 1`).
:::
)", 0) \
DECLARE(Bool, optimize_rewrite_array_exists_to_has, false, R"(

View File

@ -66,7 +66,6 @@ class WriteBuffer;
M(CLASS_NAME, IntervalOutputFormat) \
M(CLASS_NAME, JoinAlgorithm) \
M(CLASS_NAME, JoinStrictness) \
M(CLASS_NAME, JoinInnerTableSelectionMode) \
M(CLASS_NAME, LightweightMutationProjectionMode) \
M(CLASS_NAME, LoadBalancing) \
M(CLASS_NAME, LocalFSReadMethod) \

View File

@ -73,7 +73,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"backup_restore_keeper_max_retries_while_initializing", 0, 20, "New setting."},
{"backup_restore_keeper_max_retries_while_handling_error", 0, 20, "New setting."},
{"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."},
{"query_plan_join_inner_table_selection", "auto", "auto", "New setting."},
{"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"},
}
},

View File

@ -55,10 +55,6 @@ IMPLEMENT_SETTING_MULTI_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
{"full_sorting_merge", JoinAlgorithm::FULL_SORTING_MERGE},
{"grace_hash", JoinAlgorithm::GRACE_HASH}})
IMPLEMENT_SETTING_ENUM(JoinInnerTableSelectionMode, ErrorCodes::BAD_ARGUMENTS,
{{"left", JoinInnerTableSelectionMode::Left},
{"right", JoinInnerTableSelectionMode::Right},
{"auto", JoinInnerTableSelectionMode::Auto}})
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,
{{"before_having", TotalsMode::BEFORE_HAVING},

View File

@ -128,8 +128,8 @@ constexpr auto getEnumValues();
DECLARE_SETTING_ENUM(LoadBalancing)
DECLARE_SETTING_ENUM(JoinStrictness)
DECLARE_SETTING_MULTI_ENUM(JoinAlgorithm)
DECLARE_SETTING_ENUM(JoinInnerTableSelectionMode)
/// Which rows should be included in TOTALS.

View File

@ -35,6 +35,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/** Cursor allows to compare rows in different blocks (and parts).
* Cursor moves inside single block.
* It is used in priority queue.
@ -83,21 +88,27 @@ struct SortCursorImpl
SortCursorImpl(
const Block & header,
const Columns & columns,
size_t num_rows,
const SortDescription & desc_,
size_t order_ = 0,
IColumn::Permutation * perm = nullptr)
: desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
{
reset(columns, header, perm);
reset(columns, header, num_rows, perm);
}
bool empty() const { return rows == 0; }
/// Set the cursor to the beginning of the new block.
void reset(const Block & block, IColumn::Permutation * perm = nullptr) { reset(block.getColumns(), block, perm); }
void reset(const Block & block, IColumn::Permutation * perm = nullptr)
{
if (block.getColumns().empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column list in block");
reset(block.getColumns(), block, block.getColumns()[0]->size(), perm);
}
/// Set the cursor to the beginning of the new block.
void reset(const Columns & columns, const Block & block, IColumn::Permutation * perm = nullptr)
void reset(const Columns & columns, const Block & block, UInt64 num_rows, IColumn::Permutation * perm = nullptr)
{
all_columns.clear();
sort_columns.clear();
@ -125,7 +136,7 @@ struct SortCursorImpl
}
pos = 0;
rows = all_columns[0]->size();
rows = num_rows;
permutation = perm;
}

View File

@ -33,9 +33,12 @@ struct FillColumnDescription
DataTypePtr fill_to_type;
Field fill_step; /// Default = +1 or -1 according to direction
std::optional<IntervalKind> step_kind;
Field fill_staleness; /// Default = Null - should not be considered
std::optional<IntervalKind> staleness_kind;
using StepFunction = std::function<void(Field &)>;
using StepFunction = std::function<void(Field &, Int32 jumps_count)>;
StepFunction step_func;
StepFunction staleness_step_func;
};
/// Description of the sorting rule by one column.

View File

@ -1,6 +1,9 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeObjectDeprecated.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/Serializations/SerializationJSON.h>
#include <DataTypes/Serializations/SerializationObjectTypedPath.h>
#include <DataTypes/Serializations/SerializationObjectDynamicPath.h>
@ -522,6 +525,13 @@ static DataTypePtr createObject(const ASTPtr & arguments, const DataTypeObject::
return std::make_shared<DataTypeObject>(schema_format, std::move(typed_paths), std::move(paths_to_skip), std::move(path_regexps_to_skip), max_dynamic_paths, max_dynamic_types);
}
const DataTypePtr & DataTypeObject::getTypeOfSharedData()
{
/// Array(Tuple(String, String))
static const DataTypePtr type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(DataTypes{std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}, Names{"paths", "values"}));
return type;
}
static DataTypePtr createJSON(const ASTPtr & arguments)
{
auto context = CurrentThread::getQueryContext();

View File

@ -63,6 +63,9 @@ public:
size_t getMaxDynamicTypes() const { return max_dynamic_types; }
size_t getMaxDynamicPaths() const { return max_dynamic_paths; }
/// Shared data has type Array(Tuple(String, String)).
static const DataTypePtr & getTypeOfSharedData();
private:
SchemaFormat schema_format;
/// Set of paths with types that were specified in type declaration.

View File

@ -25,7 +25,7 @@ SerializationObject::SerializationObject(
: typed_path_serializations(std::move(typed_path_serializations_))
, paths_to_skip(paths_to_skip_)
, dynamic_serialization(std::make_shared<SerializationDynamic>())
, shared_data_serialization(getTypeOfSharedData()->getDefaultSerialization())
, shared_data_serialization(DataTypeObject::getTypeOfSharedData()->getDefaultSerialization())
{
/// We will need sorted order of typed paths to serialize them in order for consistency.
sorted_typed_paths.reserve(typed_path_serializations.size());
@ -38,13 +38,6 @@ SerializationObject::SerializationObject(
path_regexps_to_skip.emplace_back(regexp_str);
}
const DataTypePtr & SerializationObject::getTypeOfSharedData()
{
/// Array(Tuple(String, String))
static const DataTypePtr type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(DataTypes{std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}, Names{"paths", "values"}));
return type;
}
bool SerializationObject::shouldSkipPath(const String & path) const
{
if (paths_to_skip.contains(path))
@ -168,7 +161,7 @@ void SerializationObject::enumerateStreams(EnumerateStreamsSettings & settings,
settings.path.push_back(Substream::ObjectSharedData);
auto shared_data_substream_data = SubstreamData(shared_data_serialization)
.withType(getTypeOfSharedData())
.withType(DataTypeObject::getTypeOfSharedData())
.withColumn(column_object ? column_object->getSharedDataPtr() : nullptr)
.withSerializationInfo(data.serialization_info)
.withDeserializeState(deserialize_state ? deserialize_state->shared_data_state : nullptr);

View File

@ -111,9 +111,6 @@ private:
DeserializeBinaryBulkSettings & settings,
SubstreamsDeserializeStatesCache * cache);
/// Shared data has type Array(Tuple(String, String)).
static const DataTypePtr & getTypeOfSharedData();
struct TypedPathSubcolumnCreator : public ISubcolumnCreator
{
String path;

View File

@ -18,7 +18,7 @@ SerializationObjectDynamicPath::SerializationObjectDynamicPath(
, path(path_)
, path_subcolumn(path_subcolumn_)
, dynamic_serialization(std::make_shared<SerializationDynamic>())
, shared_data_serialization(SerializationObject::getTypeOfSharedData()->getDefaultSerialization())
, shared_data_serialization(DataTypeObject::getTypeOfSharedData()->getDefaultSerialization())
, max_dynamic_types(max_dynamic_types_)
{
}
@ -67,8 +67,8 @@ void SerializationObjectDynamicPath::enumerateStreams(
{
settings.path.push_back(Substream::ObjectSharedData);
auto shared_data_substream_data = SubstreamData(shared_data_serialization)
.withType(data.type ? SerializationObject::getTypeOfSharedData() : nullptr)
.withColumn(data.column ? SerializationObject::getTypeOfSharedData()->createColumn() : nullptr)
.withType(data.type ? DataTypeObject::getTypeOfSharedData() : nullptr)
.withColumn(data.column ? DataTypeObject::getTypeOfSharedData()->createColumn() : nullptr)
.withSerializationInfo(data.serialization_info)
.withDeserializeState(deserialize_state->nested_state);
settings.path.back().data = shared_data_substream_data;
@ -164,7 +164,7 @@ void SerializationObjectDynamicPath::deserializeBinaryBulkWithMultipleStreams(
settings.path.push_back(Substream::ObjectSharedData);
/// Initialize shared_data column if needed.
if (result_column->empty())
dynamic_path_state->shared_data = SerializationObject::getTypeOfSharedData()->createColumn();
dynamic_path_state->shared_data = DataTypeObject::getTypeOfSharedData()->createColumn();
size_t prev_size = result_column->size();
shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(dynamic_path_state->shared_data, limit, settings, dynamic_path_state->nested_state, cache);
/// If we need to read a subcolumn from Dynamic column, create an empty Dynamic column, fill it and extract subcolumn.

View File

@ -17,7 +17,7 @@ SerializationSubObject::SerializationSubObject(
: path_prefix(path_prefix_)
, typed_paths_serializations(typed_paths_serializations_)
, dynamic_serialization(std::make_shared<SerializationDynamic>())
, shared_data_serialization(SerializationObject::getTypeOfSharedData()->getDefaultSerialization())
, shared_data_serialization(DataTypeObject::getTypeOfSharedData()->getDefaultSerialization())
{
}
@ -64,8 +64,8 @@ void SerializationSubObject::enumerateStreams(
/// We will need to read shared data to find all paths with requested prefix.
settings.path.push_back(Substream::ObjectSharedData);
auto shared_data_substream_data = SubstreamData(shared_data_serialization)
.withType(data.type ? SerializationObject::getTypeOfSharedData() : nullptr)
.withColumn(data.column ? SerializationObject::getTypeOfSharedData()->createColumn() : nullptr)
.withType(data.type ? DataTypeObject::getTypeOfSharedData() : nullptr)
.withColumn(data.column ? DataTypeObject::getTypeOfSharedData()->createColumn() : nullptr)
.withSerializationInfo(data.serialization_info)
.withDeserializeState(deserialize_state ? deserialize_state->shared_data_state : nullptr);
settings.path.back().data = shared_data_substream_data;
@ -208,7 +208,7 @@ void SerializationSubObject::deserializeBinaryBulkWithMultipleStreams(
settings.path.push_back(Substream::ObjectSharedData);
/// If it's a new object column, reinitialize column for shared data.
if (result_column->empty())
sub_object_state->shared_data = SerializationObject::getTypeOfSharedData()->createColumn();
sub_object_state->shared_data = DataTypeObject::getTypeOfSharedData()->createColumn();
size_t prev_size = column_object.size();
shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(sub_object_state->shared_data, limit, settings, sub_object_state->shared_data_state, cache);
settings.path.pop_back();

View File

@ -307,6 +307,13 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
if (!columns.empty())
columns_part = fmt::format(" AND attname IN ('{}')", boost::algorithm::join(columns, "','"));
/// Bypassing the error of the missing column `attgenerated` in the system table `pg_attribute` for PostgreSQL versions below 12.
/// This trick involves executing a special query to the DBMS in advance to obtain the correct line with comment /// if column has GENERATED.
/// The result of the query will be the name of the column `attgenerated` or an empty string declaration for PostgreSQL version 11 and below.
/// This change does not degrade the function's performance but restores support for older versions and fix ERROR: column "attgenerated" does not exist.
pqxx::result gen_result{tx.exec("select case when current_setting('server_version_num')::int < 120000 then '''''' else 'attgenerated' end as generated")};
std::string generated = gen_result[0][0].as<std::string>();
std::string query = fmt::format(
"SELECT attname AS name, " /// column name
"format_type(atttypid, atttypmod) AS type, " /// data type
@ -315,11 +322,11 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
"atttypid as type_id, "
"atttypmod as type_modifier, "
"attnum as att_num, "
"attgenerated as generated " /// if column has GENERATED
"{} as generated " /// if column has GENERATED
"FROM pg_attribute "
"WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) {}"
"AND NOT attisdropped AND attnum > 0 "
"ORDER BY attnum ASC", where, columns_part);
"ORDER BY attnum ASC", generated, where, columns_part); /// Now we use variable `generated` to form query string. End of trick.
auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table);
table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false);

View File

@ -3921,7 +3921,7 @@ private:
}
}
WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const
WrapperType createTupleToObjectDeprecatedWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const
{
if (!from_tuple.haveExplicitNames())
throw Exception(ErrorCodes::TYPE_MISMATCH,
@ -3968,7 +3968,7 @@ private:
};
}
WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const
WrapperType createMapToObjectDeprecatedWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const
{
auto key_value_types = from_map.getKeyValueTypes();
@ -4048,11 +4048,11 @@ private:
{
if (const auto * from_tuple = checkAndGetDataType<DataTypeTuple>(from_type.get()))
{
return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns());
return createTupleToObjectDeprecatedWrapper(*from_tuple, to_type->hasNullableSubcolumns());
}
else if (const auto * from_map = checkAndGetDataType<DataTypeMap>(from_type.get()))
{
return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns());
return createMapToObjectDeprecatedWrapper(*from_map, to_type->hasNullableSubcolumns());
}
else if (checkAndGetDataType<DataTypeString>(from_type.get()))
{
@ -4081,23 +4081,43 @@ private:
"Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName());
}
WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_object) const
{
if (checkAndGetDataType<DataTypeString>(from_type.get()))
{
return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
{
auto res = ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count, context)->assumeMutable();
res->finalize();
return res;
return ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count, context);
};
}
/// Cast Tuple/Object/Map to JSON type through serializing into JSON string and parsing back into JSON column.
/// Potentially we can do smarter conversion Tuple -> JSON with type preservation, but it's questionable how exactly Tuple should be
/// converted to JSON (for example, should we recursively convert nested Array(Tuple) to Array(JSON) or not, should we infer types from String fields, etc).
if (checkAndGetDataType<DataTypeObjectDeprecated>(from_type.get()) || checkAndGetDataType<DataTypeTuple>(from_type.get()) || checkAndGetDataType<DataTypeMap>(from_type.get()))
{
return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
{
auto json_string = ColumnString::create();
ColumnStringHelpers::WriteHelper write_helper(assert_cast<ColumnString &>(*json_string), input_rows_count);
auto & write_buffer = write_helper.getWriteBuffer();
FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{};
auto serialization = arguments[0].type->getDefaultSerialization();
for (size_t i = 0; i < input_rows_count; ++i)
{
serialization->serializeTextJSON(*arguments[0].column, i, write_buffer, format_settings);
write_helper.rowWritten();
}
write_helper.finalize();
ColumnsWithTypeAndName args_with_json_string = {ColumnWithTypeAndName(json_string->getPtr(), std::make_shared<DataTypeString>(), "")};
return ConvertImplGenericFromString<true>::execute(args_with_json_string, result_type, nullable_source, input_rows_count, context);
};
}
/// TODO: support CAST between JSON types with different parameters
/// support CAST from Map to JSON
/// support CAST from Tuple to JSON
/// support CAST from Object('json') to JSON
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cast to {} can be performed only from String. Got: {}", magic_enum::enum_name(to_object->getSchemaFormat()), from_type->getName());
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cast to {} can be performed only from String/Map/Object/Tuple. Got: {}", magic_enum::enum_name(to_object->getSchemaFormat()), from_type->getName());
}
WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const

View File

@ -24,92 +24,7 @@ namespace ErrorCodes
void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast)
{
if (!ast)
{
chassert(false);
return;
}
/// FIXME: this helper should use updatePointerToChild(), but
/// forEachPointerToChild() is not implemented for ASTColumnDeclaration
/// (and also some members should be adjusted for this).
const auto visit_child_with_shared_ptr = [&](ASTPtr & child)
{
if (!child)
return;
auto * old_value = child.get();
visit(child);
// child did not change
if (old_value == child.get())
return;
// child changed, we need to modify it in the list of children of the parent also
for (auto & current_child : ast->children)
{
if (current_child.get() == old_value)
current_child = child;
}
};
if (auto * col_decl = ast->as<ASTColumnDeclaration>())
{
visit_child_with_shared_ptr(col_decl->default_expression);
visit_child_with_shared_ptr(col_decl->ttl);
return;
}
if (auto * storage = ast->as<ASTStorage>())
{
const auto visit_child = [&](IAST * & child)
{
if (!child)
return;
if (const auto * function = child->template as<ASTFunction>())
{
std::unordered_set<std::string> udf_in_replace_process;
auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process);
if (replace_result)
ast->setOrReplace(child, replace_result);
}
visit(child);
};
visit_child(storage->partition_by);
visit_child(storage->primary_key);
visit_child(storage->order_by);
visit_child(storage->sample_by);
visit_child(storage->ttl_table);
return;
}
if (auto * alter = ast->as<ASTAlterCommand>())
{
/// It is OK to use updatePointerToChild() because ASTAlterCommand implements forEachPointerToChild()
const auto visit_child_update_parent = [&](ASTPtr & child)
{
if (!child)
return;
auto * old_ptr = child.get();
visit(child);
auto * new_ptr = child.get();
/// Some AST classes have naked pointers to children elements as members.
/// We have to replace them if the child was replaced.
if (new_ptr != old_ptr)
ast->updatePointerToChild(old_ptr, new_ptr);
};
for (auto & children : alter->children)
visit_child_update_parent(children);
return;
}
chassert(ast);
if (const auto * function = ast->template as<ASTFunction>())
{
@ -120,7 +35,19 @@ void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast)
}
for (auto & child : ast->children)
{
if (!child)
return;
auto * old_ptr = child.get();
visit(child);
auto * new_ptr = child.get();
/// Some AST classes have naked pointers to children elements as members.
/// We have to replace them if the child was replaced.
if (new_ptr != old_ptr)
ast->updatePointerToChild(old_ptr, new_ptr);
}
}
void UserDefinedSQLFunctionVisitor::visit(IAST * ast)

View File

@ -60,17 +60,6 @@ public:
IBlocksStreamPtr
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
bool isCloneSupported() const override
{
return !getTotals() && getTotalRowCount() == 0;
}
std::shared_ptr<IJoin> clone(const std::shared_ptr<TableJoin> & table_join_, const Block &, const Block & right_sample_block_) const override
{
return std::make_shared<ConcurrentHashJoin>(context, table_join_, slots, right_sample_block_, stats_collecting_params);
}
private:
struct InternalHashJoin
{

View File

@ -1,11 +1,24 @@
#include <Interpreters/FillingRow.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <cstddef>
#include <IO/Operators.h>
#include <Common/Logger.h>
#include <Common/logger_useful.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Interpreters/FillingRow.h>
namespace DB
{
constexpr static bool debug_logging_enabled = false;
template <class... Args>
inline static void logDebug(const char * fmt_str, Args&&... args)
{
if constexpr (debug_logging_enabled)
LOG_DEBUG(getLogger("FillingRow"), "{}", fmt::format(fmt::runtime(fmt_str), std::forward<Args>(args)...));
}
bool less(const Field & lhs, const Field & rhs, int direction)
{
if (direction == -1)
@ -28,6 +41,10 @@ FillingRow::FillingRow(const SortDescription & sort_description_)
: sort_description(sort_description_)
{
row.resize(sort_description.size());
constraints.reserve(sort_description.size());
for (size_t i = 0; i < size(); ++i)
constraints.push_back(getFillDescription(i).fill_to);
}
bool FillingRow::operator<(const FillingRow & other) const
@ -63,71 +80,254 @@ bool FillingRow::isNull() const
return true;
}
std::pair<bool, bool> FillingRow::next(const FillingRow & to_row)
std::optional<Field> FillingRow::doLongJump(const FillColumnDescription & descr, size_t column_ind, const Field & to)
{
Field shifted_value = row[column_ind];
if (less(to, shifted_value, getDirection(column_ind)))
return std::nullopt;
for (int32_t step_len = 1, step_no = 0; step_no < 100 && step_len > 0; ++step_no)
{
Field next_value = shifted_value;
descr.step_func(next_value, step_len);
if (less(to, next_value, getDirection(0)))
{
step_len /= 2;
}
else
{
shifted_value = std::move(next_value);
step_len *= 2;
}
}
return shifted_value;
}
bool FillingRow::hasSomeConstraints(size_t pos) const
{
return !constraints[pos].isNull();
}
bool FillingRow::isConstraintsSatisfied(size_t pos) const
{
chassert(!row[pos].isNull());
chassert(hasSomeConstraints(pos));
int direction = getDirection(pos);
logDebug("constraint: {}, row: {}, direction: {}", constraints[pos], row[pos], direction);
return less(row[pos], constraints[pos], direction);
}
static const Field & findBorder(const Field & constraint, const Field & next_original, int direction)
{
if (constraint.isNull())
return next_original;
if (next_original.isNull())
return constraint;
if (less(constraint, next_original, direction))
return constraint;
return next_original;
}
bool FillingRow::next(const FillingRow & next_original_row, bool& value_changed)
{
const size_t row_size = size();
size_t pos = 0;
/// Find position we need to increment for generating next row.
for (; pos < row_size; ++pos)
if (!row[pos].isNull() && !to_row.row[pos].isNull() && !equals(row[pos], to_row.row[pos]))
{
if (row[pos].isNull())
continue;
const Field & border = findBorder(constraints[pos], next_original_row[pos], getDirection(pos));
logDebug("border: {}", border);
if (!border.isNull() && !equals(row[pos], border))
break;
}
if (pos == row_size || less(to_row.row[pos], row[pos], getDirection(pos)))
return {false, false};
logDebug("pos: {}", pos);
/// If we have any 'fill_to' value at position greater than 'pos',
/// we need to generate rows up to 'fill_to' value.
if (pos == row_size)
return false;
if (!next_original_row[pos].isNull() && less(next_original_row[pos], row[pos], getDirection(pos)))
return false;
if (!constraints[pos].isNull() && !less(row[pos], constraints[pos], getDirection(pos)))
return false;
/// If we have any 'fill_to' value at position greater than 'pos' or configured staleness,
/// we need to generate rows up to one of this borders.
for (size_t i = row_size - 1; i > pos; --i)
{
auto & fill_column_desc = getFillDescription(i);
if (fill_column_desc.fill_to.isNull() || row[i].isNull())
if (row[i].isNull())
continue;
if (constraints[i].isNull())
continue;
Field next_value = row[i];
fill_column_desc.step_func(next_value);
if (less(next_value, fill_column_desc.fill_to, getDirection(i)))
{
row[i] = next_value;
initFromDefaults(i + 1);
return {true, true};
}
fill_column_desc.step_func(next_value, 1);
if (!less(next_value, constraints[i], getDirection(i)))
continue;
row[i] = next_value;
initUsingFrom(i + 1);
value_changed = true;
return true;
}
auto next_value = row[pos];
getFillDescription(pos).step_func(next_value);
getFillDescription(pos).step_func(next_value, 1);
if (less(to_row.row[pos], next_value, getDirection(pos)) || equals(next_value, getFillDescription(pos).fill_to))
return {false, false};
if (!next_original_row[pos].isNull() && less(next_original_row[pos], next_value, getDirection(pos)))
return false;
if (!constraints[pos].isNull() && !less(next_value, constraints[pos], getDirection(pos)))
return false;
row[pos] = next_value;
if (equals(row[pos], to_row.row[pos]))
if (equals(row[pos], next_original_row[pos]))
{
bool is_less = false;
for (size_t i = pos + 1; i < row_size; ++i)
{
const auto & fill_from = getFillDescription(i).fill_from;
if (!fill_from.isNull())
row[i] = fill_from;
const auto & descr = getFillDescription(i);
if (!descr.fill_from.isNull())
row[i] = descr.fill_from;
else
row[i] = to_row.row[i];
is_less |= less(row[i], to_row.row[i], getDirection(i));
row[i] = next_original_row[i];
is_less |= (
(next_original_row[i].isNull() || less(row[i], next_original_row[i], getDirection(i))) &&
(constraints[i].isNull() || less(row[i], constraints[i], getDirection(i)))
);
}
return {is_less, true};
value_changed = true;
return is_less;
}
initFromDefaults(pos + 1);
return {true, true};
initUsingFrom(pos + 1);
value_changed = true;
return true;
}
void FillingRow::initFromDefaults(size_t from_pos)
bool FillingRow::shift(const FillingRow & next_original_row, bool& value_changed)
{
logDebug("next_original_row: {}, current: {}", next_original_row, *this);
for (size_t pos = 0; pos < size(); ++pos)
{
if (row[pos].isNull() || next_original_row[pos].isNull() || equals(row[pos], next_original_row[pos]))
continue;
if (less(next_original_row[pos], row[pos], getDirection(pos)))
return false;
std::optional<Field> next_value = doLongJump(getFillDescription(pos), pos, next_original_row[pos]);
logDebug("jumped to next value: {}", next_value.value_or("Did not complete"));
row[pos] = std::move(next_value.value());
if (equals(row[pos], next_original_row[pos]))
{
bool is_less = false;
for (size_t i = pos + 1; i < size(); ++i)
{
const auto & descr = getFillDescription(i);
if (!descr.fill_from.isNull())
row[i] = descr.fill_from;
else
row[i] = next_original_row[i];
is_less |= (
(next_original_row[i].isNull() || less(row[i], next_original_row[i], getDirection(i))) &&
(constraints[i].isNull() || less(row[i], constraints[i], getDirection(i)))
);
}
logDebug("is less: {}", is_less);
value_changed = true;
return is_less;
}
else
{
initUsingTo(/*from_pos=*/pos + 1);
value_changed = false;
return false;
}
}
return false;
}
bool FillingRow::hasSomeConstraints() const
{
for (size_t pos = 0; pos < size(); ++pos)
if (hasSomeConstraints(pos))
return true;
return false;
}
bool FillingRow::isConstraintsSatisfied() const
{
for (size_t pos = 0; pos < size(); ++pos)
{
if (row[pos].isNull() || !hasSomeConstraints(pos))
continue;
return isConstraintsSatisfied(pos);
}
return true;
}
void FillingRow::initUsingFrom(size_t from_pos)
{
for (size_t i = from_pos; i < sort_description.size(); ++i)
row[i] = getFillDescription(i).fill_from;
}
void FillingRow::initUsingTo(size_t from_pos)
{
for (size_t i = from_pos; i < sort_description.size(); ++i)
row[i] = getFillDescription(i).fill_to;
}
void FillingRow::updateConstraintsWithStalenessRow(const Columns& base_row, size_t row_ind)
{
for (size_t i = 0; i < size(); ++i)
{
const auto& descr = getFillDescription(i);
if (!descr.fill_staleness.isNull())
{
Field staleness_border = (*base_row[i])[row_ind];
descr.staleness_step_func(staleness_border, 1);
constraints[i] = findBorder(descr.fill_to, staleness_border, getDirection(i));
}
}
}
String FillingRow::dump() const
{
WriteBufferFromOwnString out;
@ -147,3 +347,12 @@ WriteBuffer & operator<<(WriteBuffer & out, const FillingRow & row)
}
}
template <>
struct fmt::formatter<DB::FillingRow> : fmt::formatter<string_view>
{
constexpr auto format(const DB::FillingRow & row, format_context & ctx) const
{
return fmt::format_to(ctx.out(), "{}", row.dump());
}
};

View File

@ -1,6 +1,6 @@
#pragma once
#include <Core/SortDescription.h>
#include <Core/SortDescription.h>
namespace DB
{
@ -15,16 +15,28 @@ bool equals(const Field & lhs, const Field & rhs);
*/
class FillingRow
{
/// finds last value <= to
std::optional<Field> doLongJump(const FillColumnDescription & descr, size_t column_ind, const Field & to);
bool hasSomeConstraints(size_t pos) const;
bool isConstraintsSatisfied(size_t pos) const;
public:
explicit FillingRow(const SortDescription & sort_description);
/// Generates next row according to fill 'from', 'to' and 'step' values.
/// Return pair of boolean
/// apply - true if filling values should be inserted into result set
/// value_changed - true if filling row value was changed
std::pair<bool, bool> next(const FillingRow & to_row);
/// Returns true if filling values should be inserted into result set
bool next(const FillingRow & next_original_row, bool& value_changed);
void initFromDefaults(size_t from_pos = 0);
/// Returns true if need to generate some prefix for to_row
bool shift(const FillingRow & next_original_row, bool& value_changed);
bool hasSomeConstraints() const;
bool isConstraintsSatisfied() const;
void initUsingFrom(size_t from_pos = 0);
void initUsingTo(size_t from_pos = 0);
void updateConstraintsWithStalenessRow(const Columns& base_row, size_t row_ind);
Field & operator[](size_t index) { return row[index]; }
const Field & operator[](size_t index) const { return row[index]; }
@ -42,6 +54,7 @@ public:
private:
Row row;
Row constraints;
SortDescription sort_description;
};

View File

@ -36,7 +36,7 @@ public:
bool isCloneSupported() const override
{
return !getTotals();
return true;
}
std::shared_ptr<IJoin> clone(const std::shared_ptr<TableJoin> & table_join_,

View File

@ -383,16 +383,6 @@ size_t HashJoin::getTotalByteCount() const
return res;
}
bool HashJoin::isUsedByAnotherAlgorithm() const
{
return table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH);
}
bool HashJoin::canRemoveColumnsFromLeftBlock() const
{
return table_join->enableEnalyzer() && !table_join->hasUsing() && !isUsedByAnotherAlgorithm();
}
void HashJoin::initRightBlockStructure(Block & saved_block_sample)
{
if (isCrossOrComma(kind))
@ -404,7 +394,8 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample)
bool multiple_disjuncts = !table_join->oneDisjunct();
/// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any).
bool save_key_columns = isUsedByAnotherAlgorithm() ||
bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) ||
table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) ||
isRightOrFull(kind) ||
multiple_disjuncts ||
table_join->getMixedJoinExpression();
@ -1237,10 +1228,7 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block,
{
if (!JoinCommon::hasNonJoinedBlocks(*table_join))
return {};
size_t left_columns_count = left_sample_block.columns();
if (canRemoveColumnsFromLeftBlock())
left_columns_count = table_join->getOutputColumns(JoinTableSide::Left).size();
bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join);
if (!flag_per_row)

View File

@ -127,7 +127,7 @@ public:
bool isCloneSupported() const override
{
return !getTotals() && getTotalRowCount() == 0;
return true;
}
std::shared_ptr<IJoin> clone(const std::shared_ptr<TableJoin> & table_join_,
@ -464,9 +464,6 @@ private:
bool empty() const;
bool isUsedByAnotherAlgorithm() const;
bool canRemoveColumnsFromLeftBlock() const;
void validateAdditionalFilterExpression(std::shared_ptr<ExpressionActions> additional_filter_expression);
bool needUsedFlagsForPerRightTableRow(std::shared_ptr<TableJoin> table_join_) const;

View File

@ -56,6 +56,7 @@ Block HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinBlockImpl(
const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right;
join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]);
}
size_t existing_columns = block.columns();
/** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
* Because if they are constants, then in the "not joined" rows, they may have different values
@ -98,22 +99,6 @@ Block HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinBlockImpl(
added_columns.buildJoinGetOutput();
else
added_columns.buildOutput();
const auto & table_join = join.table_join;
std::set<size_t> block_columns_to_erase;
if (join.canRemoveColumnsFromLeftBlock())
{
std::unordered_set<String> left_output_columns;
for (const auto & out_column : table_join->getOutputColumns(JoinTableSide::Left))
left_output_columns.insert(out_column.name);
for (size_t i = 0; i < block.columns(); ++i)
{
if (!left_output_columns.contains(block.getByPosition(i).name))
block_columns_to_erase.insert(i);
}
}
size_t existing_columns = block.columns();
for (size_t i = 0; i < added_columns.size(); ++i)
block.insert(added_columns.moveColumn(i));
@ -175,7 +160,6 @@ Block HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinBlockImpl(
block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate);
}
}
block.erase(block_columns_to_erase);
return remaining_block;
}

View File

@ -1888,9 +1888,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
expressions.join,
settings[Setting::max_block_size],
max_streams,
/* required_output_ = */ NameSet{},
analysis_result.optimize_read_in_order,
/* use_new_analyzer_ = */ false);
analysis_result.optimize_read_in_order);
join_step->setStepDescription(fmt::format("JOIN {}", expressions.join->pipelineType()));
std::vector<QueryPlanPtr> plans;

View File

@ -41,7 +41,6 @@ namespace DB
namespace Setting
{
extern const SettingsBool allow_experimental_join_right_table_sorting;
extern const SettingsBool allow_experimental_analyzer;
extern const SettingsUInt64 cross_join_min_bytes_to_compress;
extern const SettingsUInt64 cross_join_min_rows_to_compress;
extern const SettingsUInt64 default_max_bytes_in_join;
@ -144,7 +143,6 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, Temporary
, max_memory_usage(settings[Setting::max_memory_usage])
, tmp_volume(tmp_volume_)
, tmp_data(tmp_data_)
, enable_analyzer(settings[Setting::allow_experimental_analyzer])
{
}
@ -163,8 +161,6 @@ void TableJoin::resetCollected()
clauses.clear();
columns_from_joined_table.clear();
columns_added_by_join.clear();
columns_from_left_table.clear();
result_columns_from_left_table.clear();
original_names.clear();
renames.clear();
left_type_map.clear();
@ -207,20 +203,6 @@ size_t TableJoin::rightKeyInclusion(const String & name) const
return count;
}
void TableJoin::setInputColumns(NamesAndTypesList left_output_columns, NamesAndTypesList right_output_columns)
{
columns_from_left_table = std::move(left_output_columns);
columns_from_joined_table = std::move(right_output_columns);
}
const NamesAndTypesList & TableJoin::getOutputColumns(JoinTableSide side)
{
if (side == JoinTableSide::Left)
return result_columns_from_left_table;
return columns_added_by_join;
}
void TableJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix)
{
NameSet joined_columns;
@ -369,18 +351,9 @@ bool TableJoin::rightBecomeNullable(const DataTypePtr & column_type) const
return forceNullableRight() && JoinCommon::canBecomeNullable(column_type);
}
void TableJoin::setUsedColumn(const NameAndTypePair & joined_column, JoinTableSide side)
{
if (side == JoinTableSide::Left)
result_columns_from_left_table.push_back(joined_column);
else
columns_added_by_join.push_back(joined_column);
}
void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
{
setUsedColumn(joined_column, JoinTableSide::Right);
columns_added_by_join.emplace_back(joined_column);
}
NamesAndTypesList TableJoin::correctedColumnsAddedByJoin() const
@ -1022,32 +995,5 @@ size_t TableJoin::getMaxMemoryUsage() const
return max_memory_usage;
}
void TableJoin::swapSides()
{
assertEnableEnalyzer();
std::swap(key_asts_left, key_asts_right);
std::swap(left_type_map, right_type_map);
for (auto & clause : clauses)
{
std::swap(clause.key_names_left, clause.key_names_right);
std::swap(clause.on_filter_condition_left, clause.on_filter_condition_right);
std::swap(clause.analyzer_left_filter_condition_column_name, clause.analyzer_right_filter_condition_column_name);
}
std::swap(columns_from_left_table, columns_from_joined_table);
std::swap(result_columns_from_left_table, columns_added_by_join);
if (table_join.kind == JoinKind::Left)
table_join.kind = JoinKind::Right;
else if (table_join.kind == JoinKind::Right)
table_join.kind = JoinKind::Left;
}
void TableJoin::assertEnableEnalyzer() const
{
if (!enable_analyzer)
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "TableJoin: analyzer is disabled");
}
}

View File

@ -167,9 +167,6 @@ private:
ASOFJoinInequality asof_inequality = ASOFJoinInequality::GreaterOrEquals;
NamesAndTypesList columns_from_left_table;
NamesAndTypesList result_columns_from_left_table;
/// All columns which can be read from joined table. Duplicating names are qualified.
NamesAndTypesList columns_from_joined_table;
/// Columns will be added to block by JOIN.
@ -205,8 +202,6 @@ private:
bool is_join_with_constant = false;
bool enable_analyzer = false;
Names requiredJoinedNames() const;
/// Create converting actions and change key column names if required
@ -271,8 +266,6 @@ public:
VolumePtr getGlobalTemporaryVolume() { return tmp_volume; }
TemporaryDataOnDiskScopePtr getTempDataOnDisk() { return tmp_data; }
bool enableEnalyzer() const { return enable_analyzer; }
void assertEnableEnalyzer() const;
ActionsDAG createJoinedBlockActions(ContextPtr context) const;
@ -289,7 +282,6 @@ public:
}
bool allowParallelHashJoin() const;
void swapSides();
bool joinUseNulls() const { return join_use_nulls; }
@ -380,9 +372,6 @@ public:
bool leftBecomeNullable(const DataTypePtr & column_type) const;
bool rightBecomeNullable(const DataTypePtr & column_type) const;
void addJoinedColumn(const NameAndTypePair & joined_column);
void setUsedColumn(const NameAndTypePair & joined_column, JoinTableSide side);
void setColumnsAddedByJoin(const NamesAndTypesList & columns_added_by_join_value)
{
columns_added_by_join = columns_added_by_join_value;
@ -408,17 +397,11 @@ public:
ASTPtr leftKeysList() const;
ASTPtr rightKeysList() const; /// For ON syntax only
void setColumnsFromJoinedTable(NamesAndTypesList columns_from_joined_table_value, const NameSet & left_table_columns, const String & right_table_prefix, const NamesAndTypesList & columns_from_left_table_)
void setColumnsFromJoinedTable(NamesAndTypesList columns_from_joined_table_value, const NameSet & left_table_columns, const String & right_table_prefix)
{
columns_from_joined_table = std::move(columns_from_joined_table_value);
deduplicateAndQualifyColumnNames(left_table_columns, right_table_prefix);
result_columns_from_left_table = columns_from_left_table_;
columns_from_left_table = columns_from_left_table_;
}
void setInputColumns(NamesAndTypesList left_output_columns, NamesAndTypesList right_output_columns);
const NamesAndTypesList & getOutputColumns(JoinTableSide side);
const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; }
const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; }

View File

@ -1353,15 +1353,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
if (tables_with_columns.size() > 1)
{
auto columns_from_left_table = tables_with_columns[0].columns;
const auto & right_table = tables_with_columns[1];
auto columns_from_joined_table = right_table.columns;
/// query can use materialized or aliased columns from right joined table,
/// we want to request it for right table
columns_from_joined_table.insert(columns_from_joined_table.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end());
columns_from_left_table.insert(columns_from_left_table.end(), tables_with_columns[0].hidden_columns.begin(), tables_with_columns[0].hidden_columns.end());
result.analyzed_join->setColumnsFromJoinedTable(
std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix(), columns_from_left_table);
result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix());
}
translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);

View File

@ -128,4 +128,14 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo
}
}
void ASTColumnDeclaration::forEachPointerToChild(std::function<void(void **)> f)
{
f(reinterpret_cast<void **>(&default_expression));
f(reinterpret_cast<void **>(&comment));
f(reinterpret_cast<void **>(&codec));
f(reinterpret_cast<void **>(&statistics_desc));
f(reinterpret_cast<void **>(&ttl));
f(reinterpret_cast<void **>(&collation));
f(reinterpret_cast<void **>(&settings));
}
}

View File

@ -29,6 +29,9 @@ public:
ASTPtr clone() const override;
void formatImpl(const FormatSettings & format_settings, FormatState & state, FormatStateStacked frame) const override;
protected:
void forEachPointerToChild(std::function<void(void **)> f) override;
};
}

View File

@ -54,6 +54,11 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState
settings.ostr << (settings.hilite ? hilite_keyword : "") << " STEP " << (settings.hilite ? hilite_none : "");
fill_step->formatImpl(settings, state, frame);
}
if (auto fill_staleness = getFillStaleness())
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " STALENESS " << (settings.hilite ? hilite_none : "");
fill_staleness->formatImpl(settings, state, frame);
}
}
}

View File

@ -18,6 +18,7 @@ private:
FILL_FROM,
FILL_TO,
FILL_STEP,
FILL_STALENESS,
};
public:
@ -32,12 +33,14 @@ public:
void setFillFrom(ASTPtr node) { setChild(Child::FILL_FROM, node); }
void setFillTo(ASTPtr node) { setChild(Child::FILL_TO, node); }
void setFillStep(ASTPtr node) { setChild(Child::FILL_STEP, node); }
void setFillStaleness(ASTPtr node) { setChild(Child::FILL_STALENESS, node); }
/** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */
ASTPtr getCollation() const { return getChild(Child::COLLATION); }
ASTPtr getFillFrom() const { return getChild(Child::FILL_FROM); }
ASTPtr getFillTo() const { return getChild(Child::FILL_TO); }
ASTPtr getFillStep() const { return getChild(Child::FILL_STEP); }
ASTPtr getFillStaleness() const { return getChild(Child::FILL_STALENESS); }
String getID(char) const override { return "OrderByElement"; }

View File

@ -546,6 +546,7 @@ namespace DB
MR_MACROS(YY, "YY") \
MR_MACROS(YYYY, "YYYY") \
MR_MACROS(ZKPATH, "ZKPATH") \
MR_MACROS(STALENESS, "STALENESS") \
/// The list of keywords where underscore is intentional
#define APPLY_FOR_PARSER_KEYWORDS_WITH_UNDERSCORES(MR_MACROS) \

View File

@ -31,7 +31,7 @@ CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_r
/// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible.
/// Thus it's not safe for example to replace
/// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with
/// "ATTACH MATERIALIZED VIEW mv TO INNER UUID '123e4567-e89b-12d3-a456-426614174000' AS SELECT a FROM b"
/// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b"
/// This replacement is safe only for CREATE queries when inner target tables don't exist yet.
if (!query.attach)
{

View File

@ -2178,6 +2178,7 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
ParserKeyword from(Keyword::FROM);
ParserKeyword to(Keyword::TO);
ParserKeyword step(Keyword::STEP);
ParserKeyword staleness(Keyword::STALENESS);
ParserStringLiteral collate_locale_parser;
ParserExpressionWithOptionalAlias exp_parser(false);
@ -2219,6 +2220,7 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
ASTPtr fill_from;
ASTPtr fill_to;
ASTPtr fill_step;
ASTPtr fill_staleness;
if (with_fill.ignore(pos, expected))
{
has_with_fill = true;
@ -2230,6 +2232,9 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
if (step.ignore(pos, expected) && !exp_parser.parse(pos, fill_step, expected))
return false;
if (staleness.ignore(pos, expected) && !exp_parser.parse(pos, fill_staleness, expected))
return false;
}
auto elem = std::make_shared<ASTOrderByElement>();
@ -2244,6 +2249,7 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
elem->setFillFrom(fill_from);
elem->setFillTo(fill_to);
elem->setFillStep(fill_step);
elem->setFillStaleness(fill_staleness);
node = elem;

View File

@ -2,7 +2,6 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/JoinNode.h>
#include <Planner/PlannerContext.h>

View File

@ -391,6 +391,9 @@ public:
if (sort_node.hasFillStep())
buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep());
if (sort_node.hasFillStaleness())
buffer << " STALENESS " << calculateActionNodeName(sort_node.getFillStaleness());
}
if (i + 1 != order_by_nodes_size)

View File

@ -104,7 +104,6 @@ namespace Setting
extern const SettingsBool optimize_move_to_prewhere;
extern const SettingsBool optimize_move_to_prewhere_if_final;
extern const SettingsBool use_concurrency_control;
extern const SettingsJoinInnerTableSelectionMode query_plan_join_inner_table_selection;
}
namespace ErrorCodes
@ -1268,55 +1267,6 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
}
std::optional<ActionsDAG> createStepToDropColumns(
const Block & header,
const ColumnIdentifierSet & outer_scope_columns,
const PlannerContextPtr & planner_context)
{
ActionsDAG drop_unused_columns_after_join_actions_dag(header.getColumnsWithTypeAndName());
ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs;
std::unordered_set<std::string_view> drop_unused_columns_after_join_actions_dag_updated_outputs_names;
std::optional<size_t> first_skipped_column_node_index;
auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag.getOutputs();
size_t drop_unused_columns_after_join_actions_dag_outputs_size = drop_unused_columns_after_join_actions_dag_outputs.size();
const auto & global_planner_context = planner_context->getGlobalPlannerContext();
for (size_t i = 0; i < drop_unused_columns_after_join_actions_dag_outputs_size; ++i)
{
const auto & output = drop_unused_columns_after_join_actions_dag_outputs[i];
if (drop_unused_columns_after_join_actions_dag_updated_outputs_names.contains(output->result_name)
|| !global_planner_context->hasColumnIdentifier(output->result_name))
continue;
if (!outer_scope_columns.contains(output->result_name))
{
if (!first_skipped_column_node_index)
first_skipped_column_node_index = i;
continue;
}
drop_unused_columns_after_join_actions_dag_updated_outputs.push_back(output);
drop_unused_columns_after_join_actions_dag_updated_outputs_names.insert(output->result_name);
}
if (!first_skipped_column_node_index)
return {};
/** It is expected that JOIN TREE query plan will contain at least 1 column, even if there are no columns in outer scope.
*
* Example: SELECT count() FROM test_table_1 AS t1, test_table_2 AS t2;
*/
if (drop_unused_columns_after_join_actions_dag_updated_outputs.empty() && first_skipped_column_node_index)
drop_unused_columns_after_join_actions_dag_updated_outputs.push_back(drop_unused_columns_after_join_actions_dag_outputs[*first_skipped_column_node_index]);
drop_unused_columns_after_join_actions_dag_outputs = std::move(drop_unused_columns_after_join_actions_dag_updated_outputs);
return drop_unused_columns_after_join_actions_dag;
}
JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
JoinTreeQueryPlan left_join_tree_query_plan,
JoinTreeQueryPlan right_join_tree_query_plan,
@ -1589,37 +1539,21 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
}
const Block & left_header = left_plan.getCurrentHeader();
auto left_table_names = left_header.getNames();
NameSet left_table_names_set(left_table_names.begin(), left_table_names.end());
auto columns_from_joined_table = right_plan.getCurrentHeader().getNamesAndTypesList();
table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, "");
for (auto & column_from_joined_table : columns_from_joined_table)
{
/// Add columns from joined table only if they are presented in outer scope, otherwise they can be dropped
if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(column_from_joined_table.name) &&
outer_scope_columns.contains(column_from_joined_table.name))
table_join->addJoinedColumn(column_from_joined_table);
}
const Block & right_header = right_plan.getCurrentHeader();
auto columns_from_left_table = left_header.getNamesAndTypesList();
auto columns_from_right_table = right_header.getNamesAndTypesList();
table_join->setInputColumns(columns_from_left_table, columns_from_right_table);
for (auto & column_from_joined_table : columns_from_left_table)
{
/// Add columns to output only if they are presented in outer scope, otherwise they can be dropped
if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(column_from_joined_table.name) &&
outer_scope_columns.contains(column_from_joined_table.name))
table_join->setUsedColumn(column_from_joined_table, JoinTableSide::Left);
}
for (auto & column_from_joined_table : columns_from_right_table)
{
/// Add columns to output only if they are presented in outer scope, otherwise they can be dropped
if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(column_from_joined_table.name) &&
outer_scope_columns.contains(column_from_joined_table.name))
table_join->setUsedColumn(column_from_joined_table, JoinTableSide::Right);
}
if (table_join->getOutputColumns(JoinTableSide::Left).empty() && table_join->getOutputColumns(JoinTableSide::Right).empty())
{
if (!columns_from_left_table.empty())
table_join->setUsedColumn(columns_from_left_table.front(), JoinTableSide::Left);
else if (!columns_from_right_table.empty())
table_join->setUsedColumn(columns_from_right_table.front(), JoinTableSide::Right);
}
auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), left_header, right_header, planner_context);
auto result_plan = QueryPlan();
@ -1707,26 +1641,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
}
auto join_pipeline_type = join_algorithm->pipelineType();
ColumnIdentifierSet outer_scope_columns_nonempty;
if (outer_scope_columns.empty())
{
if (left_header.columns() > 1)
outer_scope_columns_nonempty.insert(left_header.getByPosition(0).name);
else if (right_header.columns() > 1)
outer_scope_columns_nonempty.insert(right_header.getByPosition(0).name);
}
auto join_step = std::make_unique<JoinStep>(
left_plan.getCurrentHeader(),
right_plan.getCurrentHeader(),
std::move(join_algorithm),
settings[Setting::max_block_size],
settings[Setting::max_threads],
outer_scope_columns.empty() ? outer_scope_columns_nonempty : outer_scope_columns,
false /*optimize_read_in_order*/,
true /*optimize_skip_unused_shards*/);
join_step->inner_table_selection_mode = settings[Setting::query_plan_join_inner_table_selection];
false /*optimize_read_in_order*/);
join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type));
@ -1737,18 +1658,47 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
result_plan.unitePlans(std::move(join_step), {std::move(plans)});
}
const auto & header_after_join = result_plan.getCurrentHeader();
if (header_after_join.columns() > outer_scope_columns.size())
ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName());
ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs;
std::unordered_set<std::string_view> drop_unused_columns_after_join_actions_dag_updated_outputs_names;
std::optional<size_t> first_skipped_column_node_index;
auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag.getOutputs();
size_t drop_unused_columns_after_join_actions_dag_outputs_size = drop_unused_columns_after_join_actions_dag_outputs.size();
for (size_t i = 0; i < drop_unused_columns_after_join_actions_dag_outputs_size; ++i)
{
auto drop_unused_columns_after_join_actions_dag = createStepToDropColumns(header_after_join, outer_scope_columns, planner_context);
if (drop_unused_columns_after_join_actions_dag)
const auto & output = drop_unused_columns_after_join_actions_dag_outputs[i];
const auto & global_planner_context = planner_context->getGlobalPlannerContext();
if (drop_unused_columns_after_join_actions_dag_updated_outputs_names.contains(output->result_name)
|| !global_planner_context->hasColumnIdentifier(output->result_name))
continue;
if (!outer_scope_columns.contains(output->result_name))
{
auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentHeader(), std::move(*drop_unused_columns_after_join_actions_dag));
drop_unused_columns_after_join_transform_step->setStepDescription("Drop unused columns after JOIN");
result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step));
if (!first_skipped_column_node_index)
first_skipped_column_node_index = i;
continue;
}
drop_unused_columns_after_join_actions_dag_updated_outputs.push_back(output);
drop_unused_columns_after_join_actions_dag_updated_outputs_names.insert(output->result_name);
}
/** It is expected that JOIN TREE query plan will contain at least 1 column, even if there are no columns in outer scope.
*
* Example: SELECT count() FROM test_table_1 AS t1, test_table_2 AS t2;
*/
if (drop_unused_columns_after_join_actions_dag_updated_outputs.empty() && first_skipped_column_node_index)
drop_unused_columns_after_join_actions_dag_updated_outputs.push_back(drop_unused_columns_after_join_actions_dag_outputs[*first_skipped_column_node_index]);
drop_unused_columns_after_join_actions_dag_outputs = std::move(drop_unused_columns_after_join_actions_dag_updated_outputs);
auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentHeader(), std::move(drop_unused_columns_after_join_actions_dag));
drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN");
result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step));
for (const auto & right_join_tree_query_plan_row_policy : right_join_tree_query_plan.used_row_policies)
left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy);

View File

@ -43,7 +43,7 @@ std::pair<Field, DataTypePtr> extractWithFillValue(const QueryTreeNodePtr & node
return result;
}
std::pair<Field, std::optional<IntervalKind>> extractWithFillStepValue(const QueryTreeNodePtr & node)
std::pair<Field, std::optional<IntervalKind>> extractWithFillValueWithIntervalKind(const QueryTreeNodePtr & node)
{
const auto & constant_node = node->as<ConstantNode &>();
@ -77,7 +77,7 @@ FillColumnDescription extractWithFillDescription(const SortNode & sort_node)
if (sort_node.hasFillStep())
{
auto extract_result = extractWithFillStepValue(sort_node.getFillStep());
auto extract_result = extractWithFillValueWithIntervalKind(sort_node.getFillStep());
fill_column_description.fill_step = std::move(extract_result.first);
fill_column_description.step_kind = std::move(extract_result.second);
}
@ -87,16 +87,36 @@ FillColumnDescription extractWithFillDescription(const SortNode & sort_node)
fill_column_description.fill_step = Field(direction_value);
}
if (sort_node.getFillStaleness())
{
auto extract_result = extractWithFillValueWithIntervalKind(sort_node.getFillStaleness());
fill_column_description.fill_staleness = std::move(extract_result.first);
fill_column_description.staleness_kind = std::move(extract_result.second);
}
///////////////////////////////////
if (applyVisitor(FieldVisitorAccurateEquals(), fill_column_description.fill_step, Field{0}))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STEP value cannot be zero");
if (sort_node.hasFillStaleness())
{
if (sort_node.hasFillFrom())
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STALENESS cannot be used together with WITH FILL FROM");
}
if (sort_node.getSortDirection() == SortDirection::ASCENDING)
{
if (applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_step, Field{0}))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STEP value cannot be negative for sorting in ascending direction");
if (applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_staleness, Field{0}))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STALENESS value cannot be negative for sorting in ascending direction");
if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() &&
applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_to, fill_column_description.fill_from))
{
@ -110,6 +130,10 @@ FillColumnDescription extractWithFillDescription(const SortNode & sort_node)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STEP value cannot be positive for sorting in descending direction");
if (applyVisitor(FieldVisitorAccurateLess(), Field{0}, fill_column_description.fill_staleness))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"WITH FILL STALENESS value cannot be positive for sorting in descending direction");
if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() &&
applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_from, fill_column_description.fill_to))
{

View File

@ -24,7 +24,12 @@ void IMergingAlgorithmWithDelayedChunk::initializeQueue(Inputs inputs)
continue;
cursors[source_num] = SortCursorImpl(
header, current_inputs[source_num].chunk.getColumns(), description, source_num, current_inputs[source_num].permutation);
header,
current_inputs[source_num].chunk.getColumns(),
current_inputs[source_num].chunk.getNumRows(),
description,
source_num,
current_inputs[source_num].permutation);
inputs_origin_merge_tree_part_level[source_num] = getPartLevelFromChunk(current_inputs[source_num].chunk);
}
@ -41,7 +46,7 @@ void IMergingAlgorithmWithDelayedChunk::updateCursor(Input & input, size_t sourc
last_chunk_sort_columns = std::move(cursors[source_num].sort_columns);
current_input.swap(input);
cursors[source_num].reset(current_input.chunk.getColumns(), header, current_input.permutation);
cursors[source_num].reset(current_input.chunk.getColumns(), header, current_input.chunk.getNumRows(), current_input.permutation);
inputs_origin_merge_tree_part_level[source_num] = getPartLevelFromChunk(current_input.chunk);

View File

@ -31,7 +31,8 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
source.skip_last_row = inputs[source_num].skip_last_row;
source.chunk = chunk_allocator.alloc(inputs[source_num].chunk);
cursors[source_num] = SortCursorImpl(header, source.chunk->getColumns(), description, source_num, inputs[source_num].permutation);
cursors[source_num] = SortCursorImpl(
header, source.chunk->getColumns(), source.chunk->getNumRows(), description, source_num, inputs[source_num].permutation);
source.chunk->all_columns = cursors[source_num].all_columns;
source.chunk->sort_columns = cursors[source_num].sort_columns;
@ -49,7 +50,7 @@ void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num
auto & source = sources[source_num];
source.skip_last_row = input.skip_last_row;
source.chunk = chunk_allocator.alloc(input.chunk);
cursors[source_num].reset(source.chunk->getColumns(), header, input.permutation);
cursors[source_num].reset(source.chunk->getColumns(), header, source.chunk->getNumRows(), input.permutation);
source.chunk->all_columns = cursors[source_num].all_columns;
source.chunk->sort_columns = cursors[source_num].sort_columns;

View File

@ -59,7 +59,7 @@ void MergingSortedAlgorithm::initialize(Inputs inputs)
if (!chunk)
continue;
cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), description, source_num);
cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), chunk.getNumRows(), description, source_num);
}
if (sorting_queue_strategy == SortingQueueStrategy::Default)
@ -84,7 +84,7 @@ void MergingSortedAlgorithm::consume(Input & input, size_t source_num)
{
removeConstAndSparse(input);
current_inputs[source_num].swap(input);
cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header);
cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header, current_inputs[source_num].chunk.getNumRows());
if (sorting_queue_strategy == SortingQueueStrategy::Default)
{

View File

@ -6,7 +6,6 @@
#include <IO/Operators.h>
#include <Common/JSONBuilder.h>
#include <Common/typeid_cast.h>
#include <Processors/Transforms/ColumnPermuteTransform.h>
namespace DB
{
@ -37,37 +36,6 @@ std::vector<std::pair<String, String>> describeJoinActions(const JoinPtr & join)
return description;
}
std::vector<size_t> getPermutationForBlock(
const Block & block,
const Block & lhs_block,
const Block & rhs_block,
const NameSet & name_filter)
{
std::vector<size_t> permutation;
permutation.reserve(block.columns());
Block::NameMap name_map = block.getNamesToIndexesMap();
bool is_trivial = true;
for (const auto & other_block : {lhs_block, rhs_block})
{
for (const auto & col : other_block)
{
if (!name_filter.contains(col.name))
continue;
if (auto it = name_map.find(col.name); it != name_map.end())
{
is_trivial = is_trivial && it->second == permutation.size();
permutation.push_back(it->second);
}
}
}
if (is_trivial && permutation.size() == block.columns())
return {};
return permutation;
}
}
JoinStep::JoinStep(
@ -76,15 +44,8 @@ JoinStep::JoinStep(
JoinPtr join_,
size_t max_block_size_,
size_t max_streams_,
NameSet required_output_,
bool keep_left_read_in_order_,
bool use_new_analyzer_)
: join(std::move(join_))
, max_block_size(max_block_size_)
, max_streams(max_streams_)
, required_output(std::move(required_output_))
, keep_left_read_in_order(keep_left_read_in_order_)
, use_new_analyzer(use_new_analyzer_)
bool keep_left_read_in_order_)
: join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
{
updateInputHeaders({left_header_, right_header_});
}
@ -94,43 +55,23 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
if (pipelines.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
Block lhs_header = pipelines[0]->getHeader();
Block rhs_header = pipelines[1]->getHeader();
if (swap_streams)
std::swap(pipelines[0], pipelines[1]);
if (join->pipelineType() == JoinPipelineType::YShaped)
{
auto joined_pipeline = QueryPipelineBuilder::joinPipelinesYShaped(
std::move(pipelines[0]), std::move(pipelines[1]), join, join_algorithm_header, max_block_size, &processors);
std::move(pipelines[0]), std::move(pipelines[1]), join, *output_header, max_block_size, &processors);
joined_pipeline->resize(max_streams);
return joined_pipeline;
}
auto pipeline = QueryPipelineBuilder::joinPipelinesRightLeft(
return QueryPipelineBuilder::joinPipelinesRightLeft(
std::move(pipelines[0]),
std::move(pipelines[1]),
join,
join_algorithm_header,
*output_header,
max_block_size,
max_streams,
keep_left_read_in_order,
&processors);
if (!use_new_analyzer)
return pipeline;
auto column_permutation = getPermutationForBlock(pipeline->getHeader(), lhs_header, rhs_header, required_output);
if (!column_permutation.empty())
{
pipeline->addSimpleTransform([&column_permutation](const Block & header)
{
return std::make_shared<ColumnPermuteTransform>(header, column_permutation);
});
}
return pipeline;
}
bool JoinStep::allowPushDownToRight() const
@ -149,49 +90,17 @@ void JoinStep::describeActions(FormatSettings & settings) const
for (const auto & [name, value] : describeJoinActions(join))
settings.out << prefix << name << ": " << value << '\n';
if (swap_streams)
settings.out << prefix << "Swapped: true\n";
}
void JoinStep::describeActions(JSONBuilder::JSONMap & map) const
{
for (const auto & [name, value] : describeJoinActions(join))
map.add(name, value);
if (swap_streams)
map.add("Swapped", true);
}
void JoinStep::setJoin(JoinPtr join_, bool swap_streams_)
{
join_algorithm_header.clear();
swap_streams = swap_streams_;
join = std::move(join_);
updateOutputHeader();
}
void JoinStep::updateOutputHeader()
{
if (join_algorithm_header)
return;
const auto & header = swap_streams ? input_headers[1] : input_headers[0];
Block result_header = JoiningTransform::transformHeader(header, join);
join_algorithm_header = result_header;
if (!use_new_analyzer)
{
if (swap_streams)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot swap streams without new analyzer");
output_header = result_header;
return;
}
auto column_permutation = getPermutationForBlock(result_header, input_headers[0], input_headers[1], required_output);
if (!column_permutation.empty())
result_header = ColumnPermuteTransform::permute(result_header, column_permutation);
output_header = result_header;
output_header = JoiningTransform::transformHeader(input_headers.front(), join);
}
static ITransformingStep::Traits getStorageJoinTraits()

View File

@ -2,7 +2,6 @@
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Core/Joins.h>
namespace DB
{
@ -20,9 +19,7 @@ public:
JoinPtr join_,
size_t max_block_size_,
size_t max_streams_,
NameSet required_output_,
bool keep_left_read_in_order_,
bool use_new_analyzer_);
bool keep_left_read_in_order_);
String getName() const override { return "Join"; }
@ -34,26 +31,16 @@ public:
void describeActions(FormatSettings & settings) const override;
const JoinPtr & getJoin() const { return join; }
void setJoin(JoinPtr join_, bool swap_streams_ = false);
void setJoin(JoinPtr join_) { join = std::move(join_); }
bool allowPushDownToRight() const;
JoinInnerTableSelectionMode inner_table_selection_mode = JoinInnerTableSelectionMode::Right;
private:
void updateOutputHeader() override;
/// Header that expected to be returned from IJoin
Block join_algorithm_header;
JoinPtr join;
size_t max_block_size;
size_t max_streams;
const NameSet required_output;
std::set<size_t> columns_to_remove;
bool keep_left_read_in_order;
bool use_new_analyzer = false;
bool swap_streams = false;
};
/// Special step for the case when Join is already filled.

View File

@ -113,7 +113,6 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack);
void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes);
void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
void optimizeJoin(QueryPlan::Node & node, QueryPlan::Nodes &);
void optimizeDistinctInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
/// A separate tree traverse to apply sorting properties after *InOrder optimizations.

View File

@ -1,101 +0,0 @@
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/Optimizations/actionsDAGUtils.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Storages/StorageMemory.h>
#include <Processors/QueryPlan/ReadFromMemoryStorageStep.h>
#include <Core/Settings.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/HashJoin/HashJoin.h>
#include <Interpreters/TableJoin.h>
#include <Common/logger_useful.h>
#include <Core/Joins.h>
namespace DB::QueryPlanOptimizations
{
static std::optional<UInt64> estimateReadRowsCount(QueryPlan::Node & node)
{
IQueryPlanStep * step = node.step.get();
if (const auto * reading = typeid_cast<const ReadFromMergeTree *>(step))
{
if (auto analyzed_result = reading->getAnalyzedResult())
return analyzed_result->selected_rows;
if (auto analyzed_result = reading->selectRangesToRead())
return analyzed_result->selected_rows;
return {};
}
if (const auto * reading = typeid_cast<const ReadFromMemoryStorageStep *>(step))
return reading->getStorage()->totalRows(Settings{});
if (node.children.size() != 1)
return {};
if (typeid_cast<ExpressionStep *>(step) || typeid_cast<FilterStep *>(step))
return estimateReadRowsCount(*node.children.front());
return {};
}
void optimizeJoin(QueryPlan::Node & node, QueryPlan::Nodes &)
{
auto * join_step = typeid_cast<JoinStep *>(node.step.get());
if (!join_step || node.children.size() != 2)
return;
const auto & join = join_step->getJoin();
if (join->pipelineType() != JoinPipelineType::FillRightFirst || !join->isCloneSupported())
return;
const auto & table_join = join->getTableJoin();
/// Algorithms other than HashJoin may not support OUTER JOINs
if (table_join.kind() != JoinKind::Inner && !typeid_cast<const HashJoin *>(join.get()))
return;
/// fixme: USING clause handled specially in join algorithm, so swap breaks it
/// fixme: Swapping for SEMI and ANTI joins should be alright, need to try to enable it and test
if (table_join.hasUsing() || table_join.strictness() != JoinStrictness::All)
return;
bool need_swap = false;
if (join_step->inner_table_selection_mode == JoinInnerTableSelectionMode::Auto)
{
auto lhs_extimation = estimateReadRowsCount(*node.children[0]);
auto rhs_extimation = estimateReadRowsCount(*node.children[1]);
LOG_TRACE(getLogger("optimizeJoin"), "Left table estimation: {}, right table estimation: {}",
lhs_extimation.transform(toString<UInt64>).value_or("unknown"),
rhs_extimation.transform(toString<UInt64>).value_or("unknown"));
if (lhs_extimation && rhs_extimation && *lhs_extimation < *rhs_extimation)
need_swap = true;
}
else if (join_step->inner_table_selection_mode == JoinInnerTableSelectionMode::Left)
{
need_swap = true;
}
if (!need_swap)
return;
const auto & headers = join_step->getInputHeaders();
if (headers.size() != 2)
return;
const auto & left_stream_input_header = headers.front();
const auto & right_stream_input_header = headers.back();
auto updated_table_join = std::make_shared<TableJoin>(table_join);
updated_table_join->swapSides();
auto updated_join = join->clone(updated_table_join, right_stream_input_header, left_stream_input_header);
join_step->setJoin(std::move(updated_join), /* swap_streams= */ true);
}
}

View File

@ -227,9 +227,6 @@ void addStepsToBuildSets(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::No
/// NOTE: frame cannot be safely used after stack was modified.
auto & frame = stack.back();
if (frame.next_child == 0)
optimizeJoin(*frame.node, nodes);
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{

View File

@ -35,8 +35,6 @@ public:
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
const StoragePtr & getStorage() const { return storage; }
private:
static constexpr auto name = "ReadFromMemoryStorage";

View File

@ -1,49 +0,0 @@
#include <Processors/Transforms/ColumnPermuteTransform.h>
namespace DB
{
namespace
{
template <typename T>
void applyPermutation(std::vector<T> & data, const std::vector<size_t> & permutation)
{
std::vector<T> res;
res.reserve(permutation.size());
for (size_t i : permutation)
res.push_back(data[i]);
data = std::move(res);
}
void permuteChunk(Chunk & chunk, const std::vector<size_t> & permutation)
{
size_t num_rows = chunk.getNumRows();
auto columns = chunk.detachColumns();
applyPermutation(columns, permutation);
chunk.setColumns(std::move(columns), num_rows);
}
}
Block ColumnPermuteTransform::permute(const Block & block, const std::vector<size_t> & permutation)
{
auto columns = block.getColumnsWithTypeAndName();
applyPermutation(columns, permutation);
return Block(columns);
}
ColumnPermuteTransform::ColumnPermuteTransform(const Block & header_, const std::vector<size_t> & permutation_)
: ISimpleTransform(header_, permute(header_, permutation_), false)
, permutation(permutation_)
{
}
void ColumnPermuteTransform::transform(Chunk & chunk)
{
permuteChunk(chunk, permutation);
}
}

View File

@ -1,30 +0,0 @@
#pragma once
#include <atomic>
#include <mutex>
#include <vector>
#include <Processors/ISimpleTransform.h>
#include <Poco/Logger.h>
#include <Interpreters/Set.h>
namespace DB
{
class ColumnPermuteTransform : public ISimpleTransform
{
public:
ColumnPermuteTransform(const Block & header_, const std::vector<size_t> & permutation_);
String getName() const override { return "ColumnPermuteTransform"; }
void transform(Chunk & chunk) override;
static Block permute(const Block & block, const std::vector<size_t> & permutation);
private:
Names column_names;
std::vector<size_t> permutation;
};
}

View File

@ -7,18 +7,20 @@
#include <Core/Types.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
#include <Common/FieldVisitorScale.h>
#include <Common/FieldVisitorSum.h>
#include <Common/FieldVisitorToString.h>
#include <Common/logger_useful.h>
#include <IO/Operators.h>
namespace DB
{
constexpr bool debug_logging_enabled = false;
constexpr static bool debug_logging_enabled = false;
template <typename T>
void logDebug(String key, const T & value, const char * separator = " : ")
inline static void logDebug(const char * key, const T & value, const char * separator = " : ")
{
if constexpr (debug_logging_enabled)
{
@ -60,15 +62,74 @@ static FillColumnDescription::StepFunction getStepFunction(
{
#define DECLARE_CASE(NAME) \
case IntervalKind::Kind::NAME: \
return [step, scale, &date_lut](Field & field) { \
return [step, scale, &date_lut](Field & field, Int32 jumps_count) { \
field = Add##NAME##sImpl::execute(static_cast<T>(\
field.safeGet<T>()), static_cast<Int32>(step), date_lut, utc_time_zone, scale); };
field.safeGet<T>()), static_cast<Int32>(step) * jumps_count, date_lut, utc_time_zone, scale); };
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
#undef DECLARE_CASE
}
}
static FillColumnDescription::StepFunction getStepFunction(const Field & step, const std::optional<IntervalKind> & step_kind, const DataTypePtr & type)
{
WhichDataType which(type);
if (step_kind)
{
if (which.isDate() || which.isDate32())
{
Int64 avg_seconds = step.safeGet<Int64>() * step_kind->toAvgSeconds();
if (std::abs(avg_seconds) < 86400)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds));
}
if (which.isDate())
return getStepFunction<UInt16>(step_kind.value(), step.safeGet<Int64>(), DateLUT::instance());
else if (which.isDate32())
return getStepFunction<Int32>(step_kind.value(), step.safeGet<Int64>(), DateLUT::instance());
else if (const auto * date_time = checkAndGetDataType<DataTypeDateTime>(type.get()))
return getStepFunction<UInt32>(step_kind.value(), step.safeGet<Int64>(), date_time->getTimeZone());
else if (const auto * date_time64 = checkAndGetDataType<DataTypeDateTime64>(type.get()))
{
const auto & step_dec = step.safeGet<const DecimalField<Decimal64> &>();
Int64 converted_step = DecimalUtils::convertTo<Int64>(step_dec.getValue(), step_dec.getScale());
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
switch (step_kind.value()) // NOLINT(bugprone-switch-missing-default-case)
{
#define DECLARE_CASE(NAME) \
case IntervalKind::Kind::NAME: \
return [converted_step, &time_zone = date_time64->getTimeZone()](Field & field, Int32 jumps_count) \
{ \
auto field_decimal = field.safeGet<DecimalField<DateTime64>>(); \
auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), converted_step * jumps_count, time_zone, utc_time_zone, field_decimal.getScale()); \
field = DecimalField(res, field_decimal.getScale()); \
}; \
break;
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
#undef DECLARE_CASE
}
}
else
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName());
}
else
{
return [step](Field & field, Int32 jumps_count)
{
auto shifted_step = step;
if (jumps_count != 1)
applyVisitor(FieldVisitorScale(jumps_count), shifted_step);
applyVisitor(FieldVisitorSum(shifted_step), field);
};
}
}
static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type)
{
auto max_type = Field::Types::Null;
@ -125,7 +186,8 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr &
if (descr.fill_from.getType() > max_type
|| descr.fill_to.getType() > max_type
|| descr.fill_step.getType() > max_type)
|| descr.fill_step.getType() > max_type
|| descr.fill_staleness.getType() > max_type)
return false;
if (!descr.fill_from.isNull())
@ -134,56 +196,11 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr &
descr.fill_to = convertFieldToTypeOrThrow(descr.fill_to, *to_type);
if (!descr.fill_step.isNull())
descr.fill_step = convertFieldToTypeOrThrow(descr.fill_step, *to_type);
if (!descr.fill_staleness.isNull())
descr.fill_staleness = convertFieldToTypeOrThrow(descr.fill_staleness, *to_type);
if (descr.step_kind)
{
if (which.isDate() || which.isDate32())
{
Int64 avg_seconds = descr.fill_step.safeGet<Int64>() * descr.step_kind->toAvgSeconds();
if (std::abs(avg_seconds) < 86400)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds));
}
if (which.isDate())
descr.step_func = getStepFunction<UInt16>(*descr.step_kind, descr.fill_step.safeGet<Int64>(), DateLUT::instance());
else if (which.isDate32())
descr.step_func = getStepFunction<Int32>(*descr.step_kind, descr.fill_step.safeGet<Int64>(), DateLUT::instance());
else if (const auto * date_time = checkAndGetDataType<DataTypeDateTime>(type.get()))
descr.step_func = getStepFunction<UInt32>(*descr.step_kind, descr.fill_step.safeGet<Int64>(), date_time->getTimeZone());
else if (const auto * date_time64 = checkAndGetDataType<DataTypeDateTime64>(type.get()))
{
const auto & step_dec = descr.fill_step.safeGet<const DecimalField<Decimal64> &>();
Int64 step = DecimalUtils::convertTo<Int64>(step_dec.getValue(), step_dec.getScale());
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
switch (*descr.step_kind) // NOLINT(bugprone-switch-missing-default-case)
{
#define DECLARE_CASE(NAME) \
case IntervalKind::Kind::NAME: \
descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \
{ \
auto field_decimal = field.safeGet<DecimalField<DateTime64>>(); \
auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, utc_time_zone, field_decimal.getScale()); \
field = DecimalField(res, field_decimal.getScale()); \
}; \
break;
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
#undef DECLARE_CASE
}
}
else
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName());
}
else
{
descr.step_func = [step = descr.fill_step](Field & field)
{
applyVisitor(FieldVisitorSum(step), field);
};
}
descr.step_func = getStepFunction(descr.fill_step, descr.step_kind, type);
descr.staleness_step_func = getStepFunction(descr.fill_staleness, descr.staleness_kind, type);
return true;
}
@ -218,6 +235,7 @@ FillingTransform::FillingTransform(
fill_column_positions.push_back(block_position);
auto & descr = filling_row.getFillDescription(i);
running_with_staleness |= !descr.fill_staleness.isNull();
const Block & output_header = getOutputPort().getHeader();
const DataTypePtr & type = removeNullable(output_header.getByPosition(block_position).type);
@ -437,7 +455,7 @@ void FillingTransform::initColumns(
non_const_columns.reserve(input_columns.size());
for (const auto & column : input_columns)
non_const_columns.push_back(column->convertToFullColumnIfConst());
non_const_columns.push_back(column->convertToFullColumnIfConst()->convertToFullColumnIfSparse());
for (const auto & column : non_const_columns)
output_columns.push_back(column->cloneEmpty()->assumeMutable());
@ -482,26 +500,26 @@ bool FillingTransform::generateSuffixIfNeeded(
MutableColumnRawPtrs res_sort_prefix_columns,
MutableColumnRawPtrs res_other_columns)
{
logDebug("generateSuffixIfNeeded() filling_row", filling_row);
logDebug("generateSuffixIfNeeded() next_row", next_row);
logDebug("generateSuffixIfNeeded filling_row", filling_row);
logDebug("generateSuffixIfNeeded next_row", next_row);
/// Determines if we should insert filling row before start generating next rows
bool should_insert_first = (next_row < filling_row && !filling_row_inserted) || next_row.isNull();
bool should_insert_first = (next_row < filling_row && !filling_row_inserted) || (next_row.isNull() && !filling_row.isNull());
logDebug("should_insert_first", should_insert_first);
for (size_t i = 0, size = filling_row.size(); i < size; ++i)
next_row[i] = filling_row.getFillDescription(i).fill_to;
next_row[i] = Field{};
logDebug("generateSuffixIfNeeded() next_row updated", next_row);
logDebug("generateSuffixIfNeeded next_row updated", next_row);
if (filling_row >= next_row)
if (!filling_row.hasSomeConstraints() || !filling_row.isConstraintsSatisfied())
{
logDebug("generateSuffixIfNeeded()", "no need to generate suffix");
logDebug("generateSuffixIfNeeded", "will not generate suffix");
return false;
}
Block interpolate_block;
if (should_insert_first && filling_row < next_row)
if (should_insert_first)
{
interpolate(result_columns, interpolate_block);
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block);
@ -516,9 +534,7 @@ bool FillingTransform::generateSuffixIfNeeded(
bool filling_row_changed = false;
while (true)
{
const auto [apply, changed] = filling_row.next(next_row);
filling_row_changed = changed;
if (!apply)
if (!filling_row.next(next_row, filling_row_changed))
break;
interpolate(result_columns, interpolate_block);
@ -595,7 +611,7 @@ void FillingTransform::transformRange(
if (!fill_from.isNull() && !equals(current_value, fill_from))
{
filling_row.initFromDefaults(i);
filling_row.initUsingFrom(i);
filling_row_inserted = false;
if (less(fill_from, current_value, filling_row.getDirection(i)))
{
@ -609,6 +625,9 @@ void FillingTransform::transformRange(
}
}
/// Init staleness first interval
filling_row.updateConstraintsWithStalenessRow(input_fill_columns, range_begin);
for (size_t row_ind = range_begin; row_ind < range_end; ++row_ind)
{
logDebug("row", row_ind);
@ -619,21 +638,14 @@ void FillingTransform::transformRange(
logDebug("should_insert_first", should_insert_first);
for (size_t i = 0, size = filling_row.size(); i < size; ++i)
{
const auto current_value = (*input_fill_columns[i])[row_ind];
const auto & fill_to = filling_row.getFillDescription(i).fill_to;
next_row[i] = (*input_fill_columns[i])[row_ind];
if (fill_to.isNull() || less(current_value, fill_to, filling_row.getDirection(i)))
next_row[i] = current_value;
else
next_row[i] = fill_to;
}
logDebug("next_row updated", next_row);
/// The condition is true when filling row is initialized by value(s) in FILL FROM,
/// and there are row(s) in current range with value(s) < then in the filling row.
/// It can happen only once for a range.
if (should_insert_first && filling_row < next_row)
if (should_insert_first && filling_row < next_row && filling_row.isConstraintsSatisfied())
{
interpolate(result_columns, interpolate_block);
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block);
@ -643,15 +655,37 @@ void FillingTransform::transformRange(
bool filling_row_changed = false;
while (true)
{
const auto [apply, changed] = filling_row.next(next_row);
filling_row_changed = changed;
if (!apply)
if (!filling_row.next(next_row, filling_row_changed))
break;
interpolate(result_columns, interpolate_block);
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block);
copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, row_ind);
filling_row_changed = false;
}
if (running_with_staleness)
{
/// Initialize staleness border for current row to generate it's prefix
filling_row.updateConstraintsWithStalenessRow(input_fill_columns, row_ind);
while (filling_row.shift(next_row, filling_row_changed))
{
logDebug("filling_row after shift", filling_row);
do
{
logDebug("inserting prefix filling_row", filling_row);
interpolate(result_columns, interpolate_block);
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block);
copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, row_ind);
filling_row_changed = false;
} while (filling_row.next(next_row, filling_row_changed));
}
}
/// new valid filling row was generated but not inserted, will use it during suffix generation
if (filling_row_changed)
filling_row_inserted = false;
@ -707,7 +741,7 @@ void FillingTransform::transform(Chunk & chunk)
/// if no data was processed, then need to initialize filling_row
if (last_row.empty())
{
filling_row.initFromDefaults();
filling_row.initUsingFrom();
filling_row_inserted = false;
}

View File

@ -84,6 +84,7 @@ private:
SortDescription sort_prefix;
const InterpolateDescriptionPtr interpolate_description; /// Contains INTERPOLATE columns
bool running_with_staleness = false; /// True if STALENESS clause was used.
FillingRow filling_row; /// Current row, which is used to fill gaps.
FillingRow next_row; /// Row to which we need to generate filling rows.
bool filling_row_inserted = false;

View File

@ -19,7 +19,6 @@ Block JoiningTransform::transformHeader(Block header, const JoinPtr & join)
join->initialize(header);
ExtraBlockPtr tmp;
join->joinBlock(header, tmp);
materializeBlockInplace(header);
LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure());
return header;
}

View File

@ -394,7 +394,7 @@ void FullMergeJoinCursor::setChunk(Chunk && chunk)
convertToFullIfSparse(chunk);
current_chunk = std::move(chunk);
cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), desc);
cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), current_chunk.getNumRows(), desc);
}
bool FullMergeJoinCursor::fullyCompleted() const

View File

@ -42,7 +42,7 @@ MergeSorter::MergeSorter(const Block & header, Chunks chunks_, SortDescription &
/// Convert to full column, because some cursors expect non-contant columns
convertToFullIfConst(chunk);
cursors.emplace_back(header, chunk.getColumns(), description, chunk_index);
cursors.emplace_back(header, chunk.getColumns(), chunk.getNumRows(), description, chunk_index);
has_collation |= cursors.back().has_collation;
nonempty_chunks.emplace_back(std::move(chunk));

View File

@ -345,10 +345,11 @@ void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_
throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than {} rows per index granule.", std::numeric_limits<UInt32>::max());
if (index_sample_block.columns() > 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column");
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected that index is build over a single column");
const String & index_column_name = index_sample_block.getByPosition(0).name;
const ColumnPtr & index_column = block.getByName(index_column_name).column;
const auto & index_column_name = index_sample_block.getByPosition(0).name;
const auto & index_column = block.getByName(index_column_name).column;
ColumnPtr column_cut = index_column->cut(*pos, rows_read);
const auto * column_array = typeid_cast<const ColumnArray *>(column_cut.get());
@ -382,8 +383,7 @@ void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_
if (index->size() + rows > std::numeric_limits<UInt32>::max())
throw Exception(ErrorCodes::INCORRECT_DATA, "Size of vector similarity index would exceed 4 billion entries");
DataTypePtr data_type = block.getDataTypes()[0];
const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get());
const auto * data_type_array = typeid_cast<const DataTypeArray *>(block.getByName(index_column_name).type.get());
if (!data_type_array)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected data type Array(Float*)");
const TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();

View File

@ -30,8 +30,8 @@ ColumnsDescription StorageSystemGrants::getColumnsDescription()
{"column", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "Name of a column to which access is granted."},
{"is_partial_revoke", std::make_shared<DataTypeUInt8>(),
"Logical value. It shows whether some privileges have been revoked. Possible values: "
"0 — The row describes a partial revoke, "
"1 — The row describes a grant."
"0 — The row describes a grant, "
"1 — The row describes a partial revoke."
},
{"grant_option", std::make_shared<DataTypeUInt8>(), "Permission is granted WITH GRANT OPTION."},
};

View File

@ -51,11 +51,11 @@ class CI:
TAG_CONFIGS = {
Tags.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]),
Tags.CI_SET_ARM: LabelConfig(
Tags.CI_SET_AARCH64: LabelConfig(
run_jobs=[
JobNames.STYLE_CHECK,
BuildNames.PACKAGE_AARCH64,
JobNames.INTEGRATION_TEST_ARM,
JobNames.INTEGRATION_TEST_AARCH64,
]
),
Tags.CI_SET_REQUIRED: LabelConfig(
@ -95,16 +95,16 @@ class CI:
static_binary_name="aarch64",
additional_pkgs=True,
),
runner_type=Runners.BUILDER_ARM,
runner_type=Runners.BUILDER_AARCH64,
),
BuildNames.PACKAGE_ARM_ASAN: CommonJobConfigs.BUILD.with_properties(
BuildNames.PACKAGE_AARCH64_ASAN: CommonJobConfigs.BUILD.with_properties(
build_config=BuildConfig(
name=BuildNames.PACKAGE_ARM_ASAN,
name=BuildNames.PACKAGE_AARCH64_ASAN,
compiler="clang-18-aarch64",
sanitizer="address",
package_type="deb",
),
runner_type=Runners.BUILDER_ARM,
runner_type=Runners.BUILDER_AARCH64,
),
BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties(
build_config=BuildConfig(
@ -276,16 +276,16 @@ class CI:
JobNames.INSTALL_TEST_AMD: CommonJobConfigs.INSTALL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE]
),
JobNames.INSTALL_TEST_ARM: CommonJobConfigs.INSTALL_TEST.with_properties(
JobNames.INSTALL_TEST_AARCH64: CommonJobConfigs.INSTALL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
),
JobNames.STATEFUL_TEST_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_ASAN]
),
JobNames.STATEFUL_TEST_ARM_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_ARM_ASAN],
runner_type=Runners.FUNC_TESTER_ARM,
JobNames.STATEFUL_TEST_AARCH64_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64_ASAN],
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.STATEFUL_TEST_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_TSAN]
@ -307,7 +307,7 @@ class CI:
),
JobNames.STATEFUL_TEST_AARCH64: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: CommonJobConfigs.STATEFUL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE]
@ -335,10 +335,10 @@ class CI:
JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2
),
JobNames.STATELESS_TEST_ARM_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_ARM_ASAN],
JobNames.STATELESS_TEST_AARCH64_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64_ASAN],
num_batches=2,
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_TSAN], num_batches=4
@ -360,7 +360,7 @@ class CI:
),
JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2
@ -432,10 +432,10 @@ class CI:
num_batches=6,
timeout=9000, # the job timed out with default value (7200)
),
JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties(
JobNames.INTEGRATION_TEST_AARCH64: CommonJobConfigs.INTEGRATION_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
num_batches=6,
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE],
@ -453,10 +453,10 @@ class CI:
required_builds=[BuildNames.PACKAGE_RELEASE],
required_on_release_branch=True,
),
JobNames.COMPATIBILITY_TEST_ARM: CommonJobConfigs.COMPATIBILITY_TEST.with_properties(
JobNames.COMPATIBILITY_TEST_AARCH64: CommonJobConfigs.COMPATIBILITY_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
required_on_release_branch=True,
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
),
JobNames.UNIT_TEST: CommonJobConfigs.UNIT_TEST.with_properties(
required_builds=[BuildNames.BINARY_RELEASE],
@ -499,22 +499,22 @@ class CI:
required_builds=[BuildNames.BINARY_RELEASE],
run_by_labels=[Labels.JEPSEN_TEST],
run_command="jepsen_check.py keeper",
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
),
JobNames.JEPSEN_SERVER: JobConfig(
required_builds=[BuildNames.BINARY_RELEASE],
run_by_labels=[Labels.JEPSEN_TEST],
run_command="jepsen_check.py server",
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
),
JobNames.PERFORMANCE_TEST_AMD64: CommonJobConfigs.PERF_TESTS.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4
),
JobNames.PERFORMANCE_TEST_ARM64: CommonJobConfigs.PERF_TESTS.with_properties(
JobNames.PERFORMANCE_TEST_AARCH64: CommonJobConfigs.PERF_TESTS.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
num_batches=4,
run_by_labels=[Labels.PR_PERFORMANCE],
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.SQLANCER: CommonJobConfigs.SQLLANCER_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE],
@ -532,9 +532,9 @@ class CI:
JobNames.CLICKBENCH_TEST: CommonJobConfigs.CLICKBENCH_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE],
),
JobNames.CLICKBENCH_TEST_ARM: CommonJobConfigs.CLICKBENCH_TEST.with_properties(
JobNames.CLICKBENCH_TEST_AARCH64: CommonJobConfigs.CLICKBENCH_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_AARCH64],
runner_type=Runners.FUNC_TESTER_ARM,
runner_type=Runners.FUNC_TESTER_AARCH64,
),
JobNames.LIBFUZZER_TEST: JobConfig(
required_builds=[BuildNames.FUZZERS],
@ -572,7 +572,7 @@ class CI:
),
JobNames.STYLE_CHECK: JobConfig(
run_always=True,
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
),
JobNames.BUGFIX_VALIDATE: JobConfig(
run_by_labels=[Labels.PR_BUGFIX, Labels.PR_CRITICAL_BUGFIX],

View File

@ -58,11 +58,11 @@ class Runners(metaclass=WithIter):
"""
BUILDER = "builder"
BUILDER_ARM = "builder-aarch64"
BUILDER_AARCH64 = "builder-aarch64"
STYLE_CHECKER = "style-checker"
STYLE_CHECKER_ARM = "style-checker-aarch64"
STYLE_CHECKER_AARCH64 = "style-checker-aarch64"
FUNC_TESTER = "func-tester"
FUNC_TESTER_ARM = "func-tester-aarch64"
FUNC_TESTER_AARCH64 = "func-tester-aarch64"
FUZZER_UNIT_TESTER = "fuzzer-unit-tester"
@ -78,7 +78,7 @@ class Tags(metaclass=WithIter):
# to upload all binaries from build jobs
UPLOAD_ALL_ARTIFACTS = "upload_all"
CI_SET_SYNC = "ci_set_sync"
CI_SET_ARM = "ci_set_arm"
CI_SET_AARCH64 = "ci_set_aarch64"
CI_SET_REQUIRED = "ci_set_required"
CI_SET_BUILDS = "ci_set_builds"
@ -106,7 +106,7 @@ class BuildNames(metaclass=WithIter):
PACKAGE_MSAN = "package_msan"
PACKAGE_DEBUG = "package_debug"
PACKAGE_AARCH64 = "package_aarch64"
PACKAGE_ARM_ASAN = "package_aarch64_asan"
PACKAGE_AARCH64_ASAN = "package_aarch64_asan"
PACKAGE_RELEASE_COVERAGE = "package_release_coverage"
BINARY_RELEASE = "binary_release"
BINARY_TIDY = "binary_tidy"
@ -134,14 +134,14 @@ class JobNames(metaclass=WithIter):
DOCKER_SERVER = "Docker server image"
DOCKER_KEEPER = "Docker keeper image"
INSTALL_TEST_AMD = "Install packages (release)"
INSTALL_TEST_ARM = "Install packages (aarch64)"
INSTALL_TEST_AARCH64 = "Install packages (aarch64)"
STATELESS_TEST_DEBUG = "Stateless tests (debug)"
STATELESS_TEST_RELEASE = "Stateless tests (release)"
STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)"
STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)"
STATELESS_TEST_ASAN = "Stateless tests (asan)"
STATELESS_TEST_ARM_ASAN = "Stateless tests (aarch64, asan)"
STATELESS_TEST_AARCH64_ASAN = "Stateless tests (aarch64, asan)"
STATELESS_TEST_TSAN = "Stateless tests (tsan)"
STATELESS_TEST_MSAN = "Stateless tests (msan)"
STATELESS_TEST_UBSAN = "Stateless tests (ubsan)"
@ -158,7 +158,7 @@ class JobNames(metaclass=WithIter):
STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)"
STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)"
STATEFUL_TEST_ASAN = "Stateful tests (asan)"
STATEFUL_TEST_ARM_ASAN = "Stateful tests (aarch64, asan)"
STATEFUL_TEST_AARCH64_ASAN = "Stateful tests (aarch64, asan)"
STATEFUL_TEST_TSAN = "Stateful tests (tsan)"
STATEFUL_TEST_MSAN = "Stateful tests (msan)"
STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)"
@ -181,7 +181,7 @@ class JobNames(metaclass=WithIter):
INTEGRATION_TEST_ASAN = "Integration tests (asan)"
INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)"
INTEGRATION_TEST_TSAN = "Integration tests (tsan)"
INTEGRATION_TEST_ARM = "Integration tests (aarch64)"
INTEGRATION_TEST_AARCH64 = "Integration tests (aarch64)"
INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)"
UPGRADE_TEST_DEBUG = "Upgrade check (debug)"
@ -205,7 +205,7 @@ class JobNames(metaclass=WithIter):
JEPSEN_SERVER = "ClickHouse Server Jepsen"
PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)"
PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)"
PERFORMANCE_TEST_AARCH64 = "Performance Comparison (aarch64)"
# SQL_LOGIC_TEST = "Sqllogic test (release)"
@ -214,10 +214,10 @@ class JobNames(metaclass=WithIter):
SQLTEST = "SQLTest"
COMPATIBILITY_TEST = "Compatibility check (release)"
COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)"
COMPATIBILITY_TEST_AARCH64 = "Compatibility check (aarch64)"
CLICKBENCH_TEST = "ClickBench (release)"
CLICKBENCH_TEST_ARM = "ClickBench (aarch64)"
CLICKBENCH_TEST_AARCH64 = "ClickBench (aarch64)"
LIBFUZZER_TEST = "libFuzzer tests"
@ -387,7 +387,7 @@ class CommonJobConfigs:
"./tests/ci/upload_result_helper.py",
],
),
runner_type=Runners.STYLE_CHECKER_ARM,
runner_type=Runners.STYLE_CHECKER_AARCH64,
disable_await=True,
)
COMPATIBILITY_TEST = JobConfig(
@ -634,8 +634,8 @@ REQUIRED_CHECKS = [
JobNames.STATEFUL_TEST_RELEASE,
JobNames.STATELESS_TEST_RELEASE,
JobNames.STATELESS_TEST_ASAN,
JobNames.STATELESS_TEST_ARM_ASAN,
JobNames.STATEFUL_TEST_ARM_ASAN,
JobNames.STATELESS_TEST_AARCH64_ASAN,
JobNames.STATEFUL_TEST_AARCH64_ASAN,
JobNames.STATELESS_TEST_FLAKY_ASAN,
JobNames.STATEFUL_TEST_ASAN,
JobNames.STYLE_CHECK,

View File

@ -131,7 +131,7 @@ def main():
check_name = args.check_name or os.getenv("CHECK_NAME")
assert check_name
check_glibc = True
# currently hardcoded to x86, don't enable for ARM
# currently hardcoded to x86, don't enable for AARCH64
check_distributions = (
"aarch64" not in check_name.lower() and "arm64" not in check_name.lower()
)

View File

@ -36,11 +36,12 @@ class TestCIConfig(unittest.TestCase):
elif "binary_" in job.lower() or "package_" in job.lower():
if job.lower() in (
CI.BuildNames.PACKAGE_AARCH64,
CI.BuildNames.PACKAGE_ARM_ASAN,
CI.BuildNames.PACKAGE_AARCH64_ASAN,
):
self.assertTrue(
CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,),
f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner",
CI.JOB_CONFIGS[job].runner_type
in (CI.Runners.BUILDER_AARCH64,),
f"Job [{job}] must have [{CI.Runners.BUILDER_AARCH64}] runner",
)
else:
self.assertTrue(
@ -96,7 +97,7 @@ class TestCIConfig(unittest.TestCase):
else:
self.assertTrue(CI.JOB_CONFIGS[job].build_config is None)
if "asan" in job and "aarch" in job:
expected_builds = [CI.BuildNames.PACKAGE_ARM_ASAN]
expected_builds = [CI.BuildNames.PACKAGE_AARCH64_ASAN]
elif "asan" in job:
expected_builds = [CI.BuildNames.PACKAGE_ASAN]
elif "msan" in job:

View File

@ -10,7 +10,7 @@ from ci_settings import CiSettings
_TEST_BODY_1 = """
#### Run only:
- [ ] <!---ci_set_required--> Some Set
- [x] <!---ci_set_arm--> Integration tests (arm64)
- [x] <!---ci_set_aarch64--> Integration tests (aarch64)
- [x] <!---ci_include_foo--> Integration tests
- [x] <!---ci_include_foo_Bar--> Integration tests
- [ ] <!---ci_include_bar--> Integration tests
@ -150,7 +150,7 @@ class TestCIOptions(unittest.TestCase):
self.assertFalse(ci_options.no_ci_cache)
self.assertTrue(ci_options.no_merge_commit)
self.assertTrue(ci_options.woolen_wolfdog)
self.assertEqual(ci_options.ci_sets, ["ci_set_arm"])
self.assertEqual(ci_options.ci_sets, ["ci_set_aarch64"])
self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"])
self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"])

View File

@ -789,7 +789,6 @@ def get_localzone():
return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:]))
# Refer to `tests/integration/helpers/random_settings.py` for integration test random settings
class SettingsRandomizer:
settings = {
"max_insert_threads": lambda: (
@ -920,9 +919,6 @@ class SettingsRandomizer:
"max_parsing_threads": lambda: random.choice([0, 1, 10]),
"optimize_functions_to_subcolumns": lambda: random.randint(0, 1),
"parallel_replicas_local_plan": lambda: random.randint(0, 1),
"query_plan_join_inner_table_selection": lambda: random.choice(
["left", "auto", "right"]
),
"output_format_native_write_json_as_string": lambda: random.randint(0, 1),
"enable_vertical_final": lambda: random.randint(0, 1),
}

View File

@ -538,6 +538,7 @@
"WITH ADMIN OPTION"
"WITH CHECK"
"WITH FILL"
"STALENESS"
"WITH GRANT OPTION"
"WITH NAME"
"WITH REPLACE OPTION"

View File

@ -67,7 +67,6 @@ DEFAULT_ENV_NAME = ".env"
DEFAULT_BASE_CONFIG_DIR = os.environ.get(
"CLICKHOUSE_TESTS_BASE_CONFIG_DIR", "/etc/clickhouse-server/"
)
DOCKER_BASE_TAG = os.environ.get("DOCKER_BASE_TAG", "latest")
SANITIZER_SIGN = "=================="
@ -504,6 +503,7 @@ class ClickHouseCluster:
"CLICKHOUSE_TESTS_DOCKERD_HOST"
)
self.docker_api_version = os.environ.get("DOCKER_API_VERSION")
self.docker_base_tag = os.environ.get("DOCKER_BASE_TAG", "latest")
self.base_cmd = ["docker", "compose"]
if custom_dockerd_host:
@ -1079,7 +1079,7 @@ class ClickHouseCluster:
env_variables["keeper_binary"] = binary_path
env_variables["keeper_cmd_prefix"] = keeper_cmd_prefix
env_variables["image"] = "clickhouse/integration-test:" + DOCKER_BASE_TAG
env_variables["image"] = "clickhouse/integration-test:" + self.docker_base_tag
env_variables["user"] = str(os.getuid())
env_variables["keeper_fs"] = "bind"
for i in range(1, 4):
@ -1653,6 +1653,7 @@ class ClickHouseCluster:
copy_common_configs=True,
config_root_name="clickhouse",
extra_configs=[],
extra_args="",
randomize_settings=True,
) -> "ClickHouseInstance":
"""Add an instance to the cluster.
@ -1675,7 +1676,7 @@ class ClickHouseCluster:
)
if tag is None:
tag = DOCKER_BASE_TAG
tag = self.docker_base_tag
if not env_variables:
env_variables = {}
self.use_keeper = use_keeper
@ -1740,6 +1741,7 @@ class ClickHouseCluster:
with_postgres_cluster=with_postgres_cluster,
with_postgresql_java_client=with_postgresql_java_client,
clickhouse_start_command=clickhouse_start_command,
clickhouse_start_extra_args=extra_args,
main_config_name=main_config_name,
users_config_name=users_config_name,
copy_common_configs=copy_common_configs,
@ -3368,6 +3370,7 @@ class ClickHouseInstance:
with_postgres_cluster,
with_postgresql_java_client,
clickhouse_start_command=CLICKHOUSE_START_COMMAND,
clickhouse_start_extra_args="",
main_config_name="config.xml",
users_config_name="users.xml",
copy_common_configs=True,
@ -3463,11 +3466,18 @@ class ClickHouseInstance:
self.users_config_name = users_config_name
self.copy_common_configs = copy_common_configs
self.clickhouse_start_command = clickhouse_start_command.replace(
clickhouse_start_command_with_conf = clickhouse_start_command.replace(
"{main_config_file}", self.main_config_name
)
self.clickhouse_stay_alive_command = "bash -c \"trap 'pkill tail' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!\"".format(
clickhouse_start_command
self.clickhouse_start_command = "{} -- {}".format(
clickhouse_start_command_with_conf, clickhouse_start_extra_args
)
self.clickhouse_start_command_in_daemon = "{} --daemon -- {}".format(
clickhouse_start_command_with_conf, clickhouse_start_extra_args
)
self.clickhouse_stay_alive_command = "bash -c \"trap 'pkill tail' INT TERM; {}; coproc tail -f /dev/null; wait $$!\"".format(
self.clickhouse_start_command_in_daemon
)
self.path = p.join(self.cluster.instances_dir, name)
@ -3910,7 +3920,7 @@ class ClickHouseInstance:
if pid is None:
logging.debug("No clickhouse process running. Start new one.")
self.exec_in_container(
["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)],
["bash", "-c", self.clickhouse_start_command_in_daemon],
user=str(os.getuid()),
)
if expected_to_fail:
@ -4230,7 +4240,7 @@ class ClickHouseInstance:
user="root",
)
self.exec_in_container(
["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)],
["bash", "-c", self.clickhouse_start_command_in_daemon],
user=str(os.getuid()),
)
@ -4311,7 +4321,7 @@ class ClickHouseInstance:
]
)
self.exec_in_container(
["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)],
["bash", "-c", self.clickhouse_start_command_in_daemon],
user=str(os.getuid()),
)
@ -4538,12 +4548,7 @@ class ClickHouseInstance:
if len(self.custom_dictionaries_paths):
write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir)
if (
self.randomize_settings
and self.image == "clickhouse/integration-test"
and self.tag == DOCKER_BASE_TAG
and self.base_config_dir == DEFAULT_BASE_CONFIG_DIR
):
if self.randomize_settings and self.base_config_dir == DEFAULT_BASE_CONFIG_DIR:
# If custom main config is used, do not apply random settings to it
write_random_settings_config(Path(users_d_dir) / "0_random_settings.xml")
@ -4704,9 +4709,7 @@ class ClickHouseInstance:
entrypoint_cmd = self.clickhouse_start_command
if self.stay_alive:
entrypoint_cmd = self.clickhouse_stay_alive_command.replace(
"{main_config_file}", self.main_config_name
)
entrypoint_cmd = self.clickhouse_stay_alive_command
else:
entrypoint_cmd = (
"["

View File

@ -5,8 +5,6 @@ def randomize_settings():
yield "max_joined_block_size_rows", random.randint(8000, 100000)
if random.random() < 0.5:
yield "max_block_size", random.randint(8000, 100000)
if random.random() < 0.5:
yield "query_plan_join_inner_table_selection", random.choice(["auto", "left"])
def write_random_settings_config(destination):

View File

@ -91,7 +91,7 @@ def test_clickhouse_client_max_peak_memory_usage_distributed(started_cluster):
with client(name="client1>", log=client_output, command=command_text) as client1:
client1.expect(prompt)
client1.send(
"SELECT COUNT(*) FROM distributed_fixed_numbers JOIN fixed_numbers_2 ON distributed_fixed_numbers.number=fixed_numbers_2.number SETTINGS query_plan_join_inner_table_selection = 'right'",
"SELECT COUNT(*) FROM distributed_fixed_numbers JOIN fixed_numbers_2 ON distributed_fixed_numbers.number=fixed_numbers_2.number",
)
client1.expect("Peak memory usage", timeout=60)
client1.expect(prompt)

View File

@ -65,12 +65,24 @@ CREATE TABLE test_s3(c1 Int8, c2 Date) ENGINE = ReplicatedMergeTree('/test/table
objects_after = get_objects_in_data_path()
assert objects_before == objects_after
node1.query("DROP TABLE test_local SYNC")
node1.query("DROP TABLE test_s3 SYNC")
def test_drop_complex_columns(started_cluster):
node1 = cluster.instances["node1"]
node1.query(
"""
CREATE TABLE warming_up(
id Int8
) ENGINE = MergeTree
order by (id) SETTINGS storage_policy = 's3';"""
)
# Now we are sure that s3 storage is up and running
start_objects = get_objects_in_data_path()
print("Objects before", start_objects)
node1 = cluster.instances["node1"]
node1.query(
"""
CREATE TABLE test_s3_complex_types(
@ -104,3 +116,4 @@ vertical_merge_algorithm_min_columns_to_activate=1;"""
end_objects = get_objects_in_data_path()
print("Objects after drop", end_objects)
assert start_objects == end_objects
node1.query("DROP TABLE warming_up SYNC")

View File

@ -15,9 +15,9 @@ INSERT INTO t2_00826 values (1,1), (1,2);
INSERT INTO t2_00826 (a) values (2), (3);
SELECT '--- cross ---';
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a ORDER BY ALL;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
SELECT '--- cross nullable ---';
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.b ORDER BY ALL;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.b;
SELECT '--- cross nullable vs not nullable ---';
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b ORDER BY t1_00826.a;
SELECT '--- cross self ---';
@ -41,15 +41,14 @@ SELECT '--- is null or ---';
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.a AND (t2_00826.b IS NULL OR t2_00826.b > t2_00826.a) ORDER BY t1_00826.a;
SELECT '--- do not rewrite alias ---';
SELECT a as b FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.a AND b > 0 ORDER BY ALL;
SELECT a as b FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.a AND b > 0;
SELECT '--- comma ---';
SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a ORDER BY ALL;
SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a;
SELECT '--- comma nullable ---';
SELECT * FROM t1_00826, t2_00826 where t1_00826.b = t2_00826.b ORDER BY ALL;
SELECT * FROM t1_00826, t2_00826 where t1_00826.b = t2_00826.b;
SELECT '--- comma and or ---';
SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a AND (t2_00826.b IS NULL OR t2_00826.b < 2)
ORDER BY ALL;
SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a AND (t2_00826.b IS NULL OR t2_00826.b < 2);
SELECT '--- cross ---';

View File

@ -20,42 +20,42 @@ select t.a, s.b, s.a, s.b, y.a, y.b from t
left join s on (t.a = s.a and s.b = t.b)
left join y on (y.a = s.a and y.b = s.b)
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select t.a as t_a from t
left join s on s.a = t_a
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select t.a, s.a as s_a from t
left join s on s.a = t.a
left join y on y.b = s.b
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select t.a, t.a, t.b as t_b from t
left join s on t.a = s.a
left join y on y.b = s.b
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select s.a, s.a, s.b as s_b, s.b from t
left join s on s.a = t.a
left join y on s.b = y.b
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select y.a, y.a, y.b as y_b, y.b from t
left join s on s.a = t.a
left join y on y.b = s.b
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
select t.a, t.a as t_a, s.a, s.a as s_a, y.a, y.a as y_a from t
left join s on t.a = s.a
left join y on y.b = s.b
order by t.a
format PrettyCompactMonoBlock;
format PrettyCompactNoEscapes;
drop table t;
drop table s;

View File

@ -1,7 +1,5 @@
SET joined_subquery_requires_alias = 0;
SET query_plan_join_inner_table_selection = 'auto';
{% for join_algorithm in ['partial_merge', 'hash'] -%}
SET join_algorithm = '{{ join_algorithm }}';

View File

@ -18,35 +18,28 @@
0 0
0 0
-
1 1
1 1
0 0
-
1 1
1 1
0 0
-
1 1
1 1
0 0
-
1 1
1 1
0 0
-
1 1
0 0
-
1 foo 1 1 300
0 foo 1 0 300
-
1 100 1970-01-01 1 100 1970-01-01
1 100 1970-01-01 1 200 1970-01-02
1 200 1970-01-02 1 100 1970-01-01

View File

@ -64,47 +64,39 @@ USING (id);
INSERT INTO t VALUES (1, 100, '1970-01-01'), (1, 200, '1970-01-02');
SELECT '-';
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id FROM t ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
RIGHT JOIN (SELECT item_id FROM t ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT item_id FROM t) l
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT item_id FROM t) l
RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT '-';
SELECT *
FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date) l
LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date ) r
ON l.item_id = r.item_id
ORDER BY ALL;
ON l.item_id = r.item_id;
DROP TABLE t;

View File

@ -26,7 +26,7 @@ Expression ((Projection + Before ORDER BY))
Parts: 1/1
Granules: 1/1
Expression ((Project names + Projection))
Filter (WHERE)
Filter ((WHERE + DROP unused columns after JOIN))
Join (JOIN FillRightFirst)
Expression
ReadFromMergeTree (default.t1)

View File

@ -75,7 +75,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverErro
SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 }
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 }
SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 }
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id ORDER BY ALL;
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id;
-- non-equi condition containing columns from different tables doesn't supported yet
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id >= t2.id; -- { serverError 403 }
SELECT * FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.id >= length(t2.key); -- { serverError 403 }
@ -89,10 +89,10 @@ SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and (t1.id == t22.id OR t22
SELECT 't22', * FROM t1 JOIN t22 ON (t22.key == t22.key2 OR t1.id == t22.id) and t1.id == t22.idd; -- { serverError 403 }
SELECT 't22', * FROM t1 JOIN t22 ON (t1.id == t22.id OR t22.key == t22.key2) and t1.id == t22.idd; -- { serverError 403 }
SELECT 't22', * FROM t1 JOIN t22 ON (t1.id == t22.id OR t22.key == t22.key2) and (t1.id == t22.idd AND (t1.key2 = 'a1' OR t1.key2 = 'a2' OR t1.key2 = 'a3' OR t1.key2 = 'a4' OR t1.key2 = 'a5' OR t1.key2 = 'a6' OR t1.key2 = 'a7' OR t1.key2 = 'a8' OR t1.key2 = 'a9' OR t1.key2 = 'a10' OR t1.key2 = 'a11' OR t1.key2 = 'a12' OR t1.key2 = 'a13' OR t1.key2 = 'a14' OR t1.key2 = 'a15' OR t1.key2 = 'a16' OR t1.key2 = 'a17' OR t1.key2 = 'a18' OR t1.key2 = 'a19' OR t1.key2 = '111')); -- { serverError 403 }
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t22.key == t22.key2 OR t1.id == t22.idd and t1.id == t22.id ORDER BY ALL;
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t1.id == t22.id OR t1.id == t22.idd and t22.key == t22.key2 ORDER BY ALL;
SELECT 't22', * FROM t1 JOIN t22 ON t22.key == t22.key2 and t1.id == t22.idd OR t1.id == t22.id and t1.id == t22.idd ORDER BY ALL;
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.id and t1.id == t22.idd OR t22.key == t22.key2 and t1.id == t22.idd ORDER BY ALL;
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t22.key == t22.key2 OR t1.id == t22.idd and t1.id == t22.id;
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t1.id == t22.id OR t1.id == t22.idd and t22.key == t22.key2;
SELECT 't22', * FROM t1 JOIN t22 ON t22.key == t22.key2 and t1.id == t22.idd OR t1.id == t22.id and t1.id == t22.idd;
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.id and t1.id == t22.idd OR t22.key == t22.key2 and t1.id == t22.idd;
{% endfor -%}

View File

@ -33,23 +33,23 @@
2 2
2 2
-- { echoOn }
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
1 0
2 2
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 1 SETTINGS enable_analyzer = 1;
0 3
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
2 2
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 2, 1 SETTINGS enable_analyzer = 1;
0 3
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
1 0
2 2
0 3
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
1 0
2 0
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 2 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
0 2
0 3
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 2, 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
1 0
2 0
0 2
@ -59,11 +59,11 @@ SELECT * FROM (SELECT 1 as a) as t1 LEFT JOIN ( SELECT ('b', 256) as b ) AS t2
1 ('',0)
SELECT * FROM (SELECT 1 as a) as t1 RIGHT JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
0 ('b',256)
SELECT * FROM (SELECT 1 as a) as t1 FULL JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL ORDER BY 2;
SELECT * FROM (SELECT 1 as a) as t1 FULL JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
1 ('',0)
0 ('b',256)
SELECT * FROM (SELECT 1 as a) as t1 SEMI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 ANTI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL ORDER BY 2;
SELECT * FROM (SELECT 1 as a) as t1 ANTI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
1 ('',0)
2
4 2 Nullable(UInt64) UInt8

View File

@ -73,20 +73,20 @@ SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 SETTINGS enable_analyzer = 0; --
SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 SETTINGS enable_analyzer = 1;
-- { echoOn }
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 1 ORDER BY 2, 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 2 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 2 ORDER BY 2, 1 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
SELECT * FROM t1 FULL JOIN t2 ON t1.id = t2.id AND 1 = 2 SETTINGS enable_analyzer = 1;
SELECT * FROM (SELECT 1 as a) as t1 INNER JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 LEFT JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 RIGHT JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 FULL JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL ORDER BY 2;
SELECT * FROM (SELECT 1 as a) as t1 FULL JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 SEMI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
SELECT * FROM (SELECT 1 as a) as t1 ANTI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL ORDER BY 2;
SELECT * FROM (SELECT 1 as a) as t1 ANTI JOIN ( SELECT ('b', 256) as b ) AS t2 ON NULL;
-- { echoOff }

View File

@ -1,8 +1,8 @@
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (id Int) ENGINE = TinyLog;
CREATE TABLE t2 (id Int) ENGINE = TinyLog;
CREATE TABLE t1 (id Int) ENGINE = MergeTree ORDER BY id;
CREATE TABLE t2 (id Int) ENGINE = MergeTree ORDER BY id;
INSERT INTO t1 VALUES (1), (2);
INSERT INTO t2 SELECT number + 5 AS x FROM (SELECT * FROM system.numbers LIMIT 1111);

View File

@ -12,9 +12,8 @@ CREATE TABLE without_nullable
insert into with_nullable values(0,'f'),(0,'usa');
insert into without_nullable values(0,'usa'),(0,'us2a');
select if(t0.country is null ,t2.country,t0.country) "country"
from without_nullable t0 right outer join with_nullable t2 on t0.country=t2.country
ORDER BY 1 DESC;
select if(t0.country is null ,t2.country,t0.country) "country"
from without_nullable t0 right outer join with_nullable t2 on t0.country=t2.country;
drop table with_nullable;
drop table without_nullable;

View File

@ -48,8 +48,7 @@ SELECT
L2SquaredDistance(v1.v, v2.v),
cosineDistance(v1.v, v2.v)
FROM vec2 v1, vec2 v2
WHERE length(v1.v) == length(v2.v)
ORDER BY ALL;
WHERE length(v1.v) == length(v2.v);
INSERT INTO vec2f VALUES (1, [100, 200, 0]), (2, [888, 777, 666]), (3, range(1, 35, 1)), (4, range(3, 37, 1)), (5, range(1, 135, 1)), (6, range(3, 137, 1));
SELECT
@ -62,8 +61,7 @@ SELECT
L2SquaredDistance(v1.v, v2.v),
cosineDistance(v1.v, v2.v)
FROM vec2f v1, vec2f v2
WHERE length(v1.v) == length(v2.v)
ORDER BY ALL;
WHERE length(v1.v) == length(v2.v);
INSERT INTO vec2d VALUES (1, [100, 200, 0]), (2, [888, 777, 666]), (3, range(1, 35, 1)), (4, range(3, 37, 1)), (5, range(1, 135, 1)), (6, range(3, 137, 1));
SELECT
@ -76,8 +74,7 @@ SELECT
L2SquaredDistance(v1.v, v2.v),
cosineDistance(v1.v, v2.v)
FROM vec2d v1, vec2d v2
WHERE length(v1.v) == length(v2.v)
ORDER BY ALL;
WHERE length(v1.v) == length(v2.v);
SELECT
v1.id,
@ -89,8 +86,7 @@ SELECT
L2SquaredDistance(v1.v, v2.v),
cosineDistance(v1.v, v2.v)
FROM vec2f v1, vec2d v2
WHERE length(v1.v) == length(v2.v)
ORDER BY ALL;
WHERE length(v1.v) == length(v2.v);
SELECT L1Distance([0, 0], [1]); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
SELECT L2Distance([1, 2], (3,4)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -0,0 +1,13 @@
-- Tags: no-fasttest, no-ordinary-database
SET allow_experimental_vector_similarity_index = 1;
-- Issue #52258: Vector similarity indexes must reject empty Arrays or Arrays with default values
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree() ORDER BY id;
INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
DROP TABLE tab;

Some files were not shown because too many files have changed in this diff Show More