Merge branch 'master' into hive_style_partitioning

This commit is contained in:
Yarik Briukhovetskyi 2024-08-12 11:44:45 +02:00 committed by GitHub
commit 8bc89ac8df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
104 changed files with 3397 additions and 1334 deletions

View File

@ -187,14 +187,6 @@ else ()
set(NO_WHOLE_ARCHIVE --no-whole-archive)
endif ()
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# Can be lld or ld-lld or lld-13 or /path/to/lld.
if (LINKER_NAME MATCHES "lld")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
endif ()
endif()
if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE)
AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
@ -402,7 +394,7 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set(ENABLE_GWP_ASAN OFF)
endif ()
option (ENABLE_FIU "Enable Fiu" ON)
option (ENABLE_LIBFIU "Enable libfiu" ON)
option(WERROR "Enable -Werror compiler option" ON)

View File

@ -179,7 +179,7 @@ else()
message(STATUS "Not using QPL")
endif ()
if (OS_LINUX AND ARCH_AMD64)
if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER)
option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES})
elseif(ENABLE_QATLIB)
message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64")

View File

@ -27,7 +27,7 @@ if (ENABLE_QAT_OUT_OF_TREE_BUILD)
${QAT_AL_INCLUDE_DIR}
${QAT_USDM_INCLUDE_DIR}
${ZSTD_LIBRARY_DIR})
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC)
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0)
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
else () # In-tree build
message(STATUS "Intel QATZSTD in-tree build")
@ -78,7 +78,7 @@ else () # In-tree build
${QAT_USDM_INCLUDE_DIR}
${ZSTD_LIBRARY_DIR}
${LIBQAT_HEADER_DIR})
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE)
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE)
target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $<BUILD_INTERFACE:${QATZSTD_SRC_DIR}> $<INSTALL_INTERFACE:include>)
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
endif ()

View File

@ -1,20 +1,21 @@
if (NOT ENABLE_FIU)
message (STATUS "Not using fiu")
if (NOT ENABLE_LIBFIU)
message (STATUS "Not using libfiu")
return ()
endif ()
set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
set(LIBFIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
set(FIU_SOURCES
${FIU_DIR}/libfiu/fiu.c
${FIU_DIR}/libfiu/fiu-rc.c
${FIU_DIR}/libfiu/backtrace.c
${FIU_DIR}/libfiu/wtable.c
set(LIBFIU_SOURCES
${LIBFIU_DIR}/libfiu/fiu.c
${LIBFIU_DIR}/libfiu/fiu-rc.c
${LIBFIU_DIR}/libfiu/backtrace.c
${LIBFIU_DIR}/libfiu/wtable.c
)
set(FIU_HEADERS "${FIU_DIR}/libfiu")
set(LIBFIU_HEADERS "${LIBFIU_DIR}/libfiu")
add_library(_fiu ${FIU_SOURCES})
target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE)
target_include_directories(_fiu PUBLIC ${FIU_HEADERS})
add_library(ch_contrib::fiu ALIAS _fiu)
add_library(_libfiu ${LIBFIU_SOURCES})
target_compile_definitions(_libfiu PUBLIC DUMMY_BACKTRACE)
target_compile_definitions(_libfiu PUBLIC FIU_ENABLE)
target_include_directories(_libfiu PUBLIC ${LIBFIU_HEADERS})
add_library(ch_contrib::libfiu ALIAS _libfiu)

View File

@ -728,10 +728,6 @@ add_library(_qpl STATIC ${LIB_DEPS})
target_include_directories(_qpl
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>)
target_compile_definitions(_qpl
PUBLIC -DENABLE_QPL_COMPRESSION)
target_link_libraries(_qpl
PRIVATE ch_contrib::accel-config)

View File

@ -1,3 +1,5 @@
# docker build -t clickhouse/cctools .
# This is a hack to significantly reduce the build time of the clickhouse/binary-builder
# It's based on the assumption that we don't care of the cctools version so much
# It event does not depend on the clickhouse/fasttest in the `docker/images.json`
@ -30,5 +32,29 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd ../.. \
&& rm -rf cctools-port
#
# GDB
#
# ld from binutils is 2.38, which has the following error:
#
# DWARF error: invalid or unhandled FORM value: 0x23
#
ENV LD=ld.lld-${LLVM_VERSION}
ARG GDB_VERSION=15.1
RUN apt-get update \
&& apt-get install --yes \
libgmp-dev \
libmpfr-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \
&& tar -xvf gdb-$GDB_VERSION.tar.gz \
&& cd gdb-$GDB_VERSION \
&& ./configure --prefix=/opt/gdb \
&& make -j $(nproc) \
&& make install \
&& rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz
FROM scratch
COPY --from=builder /cctools /cctools
COPY --from=builder /opt/gdb /opt/gdb

View File

@ -83,7 +83,7 @@ RUN arch=${TARGETARCH:-amd64} \
# Give suid to gdb to grant it attach permissions
# chmod 777 to make the container user independent
RUN chmod u+s /usr/bin/gdb \
RUN chmod u+s /opt/gdb/bin/gdb \
&& mkdir -p /var/lib/clickhouse \
&& chmod 777 /var/lib/clickhouse

View File

@ -11,7 +11,6 @@ RUN apt-get update \
curl \
default-jre \
g++ \
gdb \
iproute2 \
krb5-user \
libicu-dev \
@ -73,3 +72,6 @@ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \
ENV TZ=Etc/UTC
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
ENV PATH="/opt/gdb/bin:${PATH}"

View File

@ -30,7 +30,6 @@ RUN apt-get update \
luajit \
libssl-dev \
libcurl4-openssl-dev \
gdb \
default-jdk \
software-properties-common \
libkrb5-dev \
@ -87,6 +86,8 @@ COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
COPY misc/ /misc/
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
ENV PATH="/opt/gdb/bin:${PATH}"
# Same options as in test/base/Dockerfile
# (in case you need to override them in tests)

View File

@ -9,7 +9,6 @@ RUN apt-get update \
curl \
dmidecode \
g++ \
gdb \
git \
gnuplot \
imagemagick \
@ -42,6 +41,9 @@ RUN pip3 --no-cache-dir install -r requirements.txt
COPY run.sh /
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
ENV PATH="/opt/gdb/bin:${PATH}"
CMD ["bash", "/run.sh"]
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison

View File

@ -69,8 +69,8 @@ ENV MAX_RUN_TIME=0
# Unrelated to vars in setup_minio.sh, but should be the same there
# to have the same binaries for local running scenario
ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z
ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z
ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z
ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z
ARG TARGETARCH
# Download Minio-related binaries

View File

@ -59,8 +59,8 @@ find_os() {
download_minio() {
local os
local arch
local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z}
local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z}
os=$(find_os)
arch=$(find_arch)
@ -82,10 +82,10 @@ setup_minio() {
local test_type=$1
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc admin policy attach clickminio readwrite --user=test
./mc mb --ignore-existing clickminio/test
if [ "$test_type" = "stateless" ]; then
./mc policy set public clickminio/test
./mc anonymous set public clickminio/test
fi
}

View File

@ -44,7 +44,6 @@ RUN apt-get update \
bash \
bsdmainutils \
build-essential \
gdb \
git \
gperf \
moreutils \
@ -58,3 +57,6 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY process_functional_tests_result.py /
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
ENV PATH="/opt/gdb/bin:${PATH}"

View File

@ -75,7 +75,7 @@ Data are received by this protocol and written to a [TimeSeries](/en/engines/tab
<my_rule_1>
<url>/write</url>
<handler>
<type>remote_write</type
<type>remote_write</type>
<database>db_name</database>
<table>time_series_table</table>
</handler>
@ -105,7 +105,7 @@ Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series)
<my_rule_1>
<url>/read</url>
<handler>
<type>remote_read</type
<type>remote_read</type>
<database>db_name</database>
<table>time_series_table</table>
</handler>
@ -144,14 +144,14 @@ Multiple protocols can be specified together in one place:
<my_rule_2>
<url>/write</url>
<handler>
<type>remote_write</type
<type>remote_write</type>
<table>db_name.time_series_table</table>
</handler>
</my_rule_2>
<my_rule_3>
<url>/read</url>
<handler>
<type>remote_read</type
<type>remote_read</type>
<table>db_name.time_series_table</table>
</handler>
</my_rule_3>

View File

@ -52,6 +52,48 @@ Result:
└───────────────────────────────┴───────────────┘
```
## LineString
`LineString` is a line stored as an array of points: [Array](array.md)([Point](#point)).
**Example**
Query:
```sql
CREATE TABLE geo_linestring (l LineString) ENGINE = Memory();
INSERT INTO geo_linestring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
SELECT l, toTypeName(l) FROM geo_linestring;
```
Result:
``` text
┌─r─────────────────────────────┬─toTypeName(r)─┐
│ [(0,0),(10,0),(10,10),(0,10)] │ LineString │
└───────────────────────────────┴───────────────┘
```
## MultiLineString
`MultiLineString` is multiple lines stored as an array of `LineString`: [Array](array.md)([LineString](#linestring)).
**Example**
Query:
```sql
CREATE TABLE geo_multilinestring (l MultiLineString) ENGINE = Memory();
INSERT INTO geo_multilinestring VALUES([[(0, 0), (10, 0), (10, 10), (0, 10)], [(1, 1), (2, 2), (3, 3)]]);
SELECT l, toTypeName(l) FROM geo_multilinestring;
```
Result:
``` text
┌─l───────────────────────────────────────────────────┬─toTypeName(l)───┐
│ [[(0,0),(10,0),(10,10),(0,10)],[(1,1),(2,2),(3,3)]] │ MultiLineString │
└─────────────────────────────────────────────────────┴─────────────────┘
```
## Polygon
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.

View File

@ -11,6 +11,8 @@ Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](.
- POINT
- POLYGON
- MULTIPOLYGON
- LINESTRING
- MULTILINESTRING
**Syntax**
@ -26,12 +28,16 @@ WKT(geo_data)
- [Ring](../../data-types/geo.md#ring)
- [Polygon](../../data-types/geo.md#polygon)
- [MultiPolygon](../../data-types/geo.md#multipolygon)
- [LineString](../../data-types/geo.md#linestring)
- [MultiLineString](../../data-types/geo.md#multilinestring)
**Returned value**
- WKT geometric object `POINT` is returned for a Point.
- WKT geometric object `POLYGON` is returned for a Polygon
- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon.
- WKT geometric object `LINESTRING` is returned for a LineString.
- WKT geometric object `MULTILINESTRING` is returned for a MultiLineString.
**Examples**
@ -170,6 +176,34 @@ SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)');
[(1,1),(2,2),(3,3),(1,1)]
```
## readWKTMultiLineString
Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format.
### Syntax
```sql
readWKTMultiLineString(wkt_string)
```
### Arguments
- `wkt_string`: The input WKT string representing a MultiLineString geometry.
### Returned value
The function returns a ClickHouse internal representation of the multilinestring geometry.
### Example
```sql
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');
```
```response
[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]]
```
## readWKTRing
Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.

File diff suppressed because it is too large Load Diff

View File

@ -186,7 +186,7 @@ Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`.
:::
Clickhouse currently supports `ALL INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.
Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.
**Example**

View File

@ -143,7 +143,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
if (fs::exists(config_path))
{
ConfigProcessor config_processor(config_path, false, true);
ConfigProcessor config_processor(config_path);
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);

View File

@ -68,13 +68,19 @@ const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const
WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
{
/// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there
zkutil::ZooKeeperPtr current_zookeeper;
{
std::lock_guard lock(zookeeper_mutex);
current_zookeeper = zookeeper;
}
/// We need to create new instance of ZooKeeperWithFaultInjection each time and copy a pointer to ZooKeeper client there
/// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
/// when the same object is used from multiple threads.
auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
settings.keeper_fault_injection_probability,
settings.keeper_fault_injection_seed,
zookeeper,
current_zookeeper,
log->name(),
log);

View File

@ -353,8 +353,8 @@ target_link_libraries(clickhouse_common_io
Poco::Foundation
)
if (TARGET ch_contrib::fiu)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu)
if (TARGET ch_contrib::libfiu)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::libfiu)
endif()
if (TARGET ch_contrib::cpuid)
@ -556,14 +556,13 @@ target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
if (TARGET ch_contrib::qpl)
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl)
target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config)
endif ()
if (TARGET ch_contrib::accel-config)
dbms_target_link_libraries(PUBLIC ch_contrib::accel-config)
endif ()
if (TARGET ch_contrib::qatzstd_plugin)
if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin)
dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin)
dbms_target_link_libraries(PUBLIC ch_contrib::accel-config)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin)
endif ()

View File

@ -200,8 +200,6 @@ void ClientApplicationBase::init(int argc, char ** argv)
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
("fuzzer-args", po::value<std::string>(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.")
("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)")
;

View File

@ -7,6 +7,8 @@
#include <condition_variable>
#include <mutex>
#include "config.h"
namespace DB
{
@ -15,7 +17,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
};
#if FIU_ENABLE
#if USE_LIBFIU
static struct InitFiu
{
InitFiu()
@ -135,7 +137,7 @@ void FailPointInjection::pauseFailPoint(const String & fail_point_name)
void FailPointInjection::enableFailPoint(const String & fail_point_name)
{
#if FIU_ENABLE
#if USE_LIBFIU
#define SUB_M(NAME, flags, pause) \
if (fail_point_name == FailPoints::NAME) \
{ \

View File

@ -1,17 +1,16 @@
#pragma once
#include "config.h"
#include <Common/Exception.h>
#include <Core/Types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include "config.h"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
#include <fiu.h>
#include <fiu-control.h>
# include <fiu.h>
# include <fiu-control.h>
#pragma clang diagnostic pop
#include <unordered_map>

View File

@ -237,7 +237,14 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
res->write_fds.emplace(fd, fds.fds_rw[1]);
}
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
LOG_TRACE(
getLogger(),
"Started shell command '{}' with pid {} and file descriptors: out {}, err {}",
filename,
pid,
res->out.getFD(),
res->err.getFD());
return res;
}

View File

@ -32,6 +32,8 @@
#cmakedefine01 USE_IDNA
#cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_QPL
#cmakedefine01 USE_QATLIB
#cmakedefine01 USE_LIBURING
#cmakedefine01 USE_AVRO
#cmakedefine01 USE_CAPNP
@ -59,7 +61,7 @@
#cmakedefine01 USE_SKIM
#cmakedefine01 USE_PRQL
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_LIBFIU
#cmakedefine01 USE_BCRYPT
#cmakedefine01 USE_LIBARCHIVE
#cmakedefine01 USE_POCKETFFT

View File

@ -1,7 +1,3 @@
#ifdef ENABLE_QPL_COMPRESSION
#include <cstdio>
#include <thread>
#include <Compression/CompressionCodecDeflateQpl.h>
#include <Compression/CompressionFactory.h>
#include <Compression/CompressionInfo.h>
@ -11,6 +7,10 @@
#include <Common/randomSeed.h>
#include <base/scope_guard.h>
#include <base/getPageSize.h>
#include <cstdio>
#include <thread>
#if USE_QPL
#include "libaccel_config.h"

View File

@ -4,6 +4,11 @@
#include <map>
#include <random>
#include <pcg_random.hpp>
#include "config.h"
#if USE_QPL
#include <qpl/qpl.h>
namespace Poco
@ -117,3 +122,4 @@ private:
};
}
#endif

View File

@ -1,4 +1,6 @@
#ifdef ENABLE_ZSTD_QAT_CODEC
#include "config.h"
#if USE_QATLIB
#include <Common/logger_useful.h>
#include <Compression/CompressionCodecZSTD.h>
@ -6,6 +8,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/IAST.h>
#include <Poco/Logger.h>
#include <qatseqprod.h>
#include <zstd.h>

View File

@ -1,20 +1,20 @@
#include "config.h"
#include <Compression/CompressionFactory.h>
#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecNone.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Poco/String.h>
#include <IO/ReadBuffer.h>
#include <Parsers/queryToString.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecNone.h>
#include <IO/WriteHelpers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Poco/String.h>
#include <boost/algorithm/string/join.hpp>
#include "config.h"
namespace DB
{
@ -175,11 +175,11 @@ void registerCodecNone(CompressionCodecFactory & factory);
void registerCodecLZ4(CompressionCodecFactory & factory);
void registerCodecLZ4HC(CompressionCodecFactory & factory);
void registerCodecZSTD(CompressionCodecFactory & factory);
#ifdef ENABLE_ZSTD_QAT_CODEC
#if USE_QATLIB
void registerCodecZSTDQAT(CompressionCodecFactory & factory);
#endif
void registerCodecMultiple(CompressionCodecFactory & factory);
#ifdef ENABLE_QPL_COMPRESSION
#if USE_QPL
void registerCodecDeflateQpl(CompressionCodecFactory & factory);
#endif
@ -198,7 +198,7 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecNone(*this);
registerCodecLZ4(*this);
registerCodecZSTD(*this);
#ifdef ENABLE_ZSTD_QAT_CODEC
#if USE_QATLIB
registerCodecZSTDQAT(*this);
#endif
registerCodecLZ4HC(*this);
@ -209,7 +209,7 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecGorilla(*this);
registerCodecEncrypted(*this);
registerCodecFPC(*this);
#ifdef ENABLE_QPL_COMPRESSION
#if USE_QPL
registerCodecDeflateQpl(*this);
#endif
registerCodecGCD(*this);

View File

@ -605,7 +605,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \

View File

@ -85,6 +85,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
{"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
}
},
{"24.7",

View File

@ -24,6 +24,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory)
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeLineStringName>()));
});
// Custom type for multiple lines stored as Array(LineString)
factory.registerSimpleDataTypeCustom("MultiLineString", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(LineString)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiLineStringName>()));
});
// Custom type for simple polygon without holes stored as Array(Point)
factory.registerSimpleDataTypeCustom("Ring", []
{

View File

@ -17,6 +17,12 @@ public:
DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {}
};
class DataTypeMultiLineStringName : public DataTypeCustomFixedName
{
public:
DataTypeMultiLineStringName() : DataTypeCustomFixedName("MultiLineString") {}
};
class DataTypeRingName : public DataTypeCustomFixedName
{
public:

View File

@ -305,7 +305,8 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
S3::ListObjectsV2Request request;
request.SetBucket(uri.bucket);
request.SetPrefix(path);
if (path != "/")
request.SetPrefix(path);
if (max_keys)
request.SetMaxKeys(static_cast<int>(max_keys));
else

View File

@ -3,6 +3,7 @@
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Core/Settings.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
@ -35,7 +36,7 @@ FunctionBasePtr createFunctionBaseCast(
class CastOverloadResolverImpl : public IFunctionOverloadResolver
{
public:
const char * getNameImpl() const
static const char * getNameImpl(CastType cast_type, bool internal)
{
if (cast_type == CastType::accurate)
return "accurateCast";
@ -49,7 +50,7 @@ public:
String getName() const override
{
return getNameImpl();
return getNameImpl(cast_type, internal);
}
size_t getNumberOfArguments() const override { return 2; }
@ -79,10 +80,22 @@ public:
}
}
static FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic)
{
if (cast_type == CastType::accurateOrNull && !isVariant(to))
to = makeNullable(to);
ColumnsWithTypeAndName arguments;
arguments.emplace_back(std::move(from));
arguments.emplace_back().type = std::make_unique<DataTypeString>();
return createFunctionBaseCast(nullptr, getNameImpl(cast_type, true), arguments, to, diagnostic, cast_type);
}
protected:
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type);
return createFunctionBaseCast(context, getNameImpl(cast_type, internal), arguments, return_type, diagnostic, cast_type);
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
@ -130,9 +143,9 @@ private:
};
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic)
FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic)
{
return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic);
return CastOverloadResolverImpl::createInternalCast(std::move(from), std::move(to), cast_type, std::move(diagnostic));
}
REGISTER_FUNCTION(CastOverloadResolvers)

View File

@ -3,6 +3,7 @@
#include <memory>
#include <optional>
#include <Interpreters/Context_fwd.h>
#include <Core/ColumnWithTypeAndName.h>
namespace DB
@ -11,6 +12,9 @@ namespace DB
class IFunctionOverloadResolver;
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
enum class CastType : uint8_t
{
nonAccurate,
@ -24,6 +28,6 @@ struct CastDiagnostic
std::string column_to;
};
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic);
FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic);
}

View File

@ -31,6 +31,9 @@ namespace ErrorCodes
template <typename Point>
using LineString = boost::geometry::model::linestring<Point>;
template <typename Point>
using MultiLineString = boost::geometry::model::multi_linestring<LineString<Point>>;
template <typename Point>
using Ring = boost::geometry::model::ring<Point>;
@ -42,12 +45,14 @@ using MultiPolygon = boost::geometry::model::multi_polygon<Polygon<Point>>;
using CartesianPoint = boost::geometry::model::d2::point_xy<Float64>;
using CartesianLineString = LineString<CartesianPoint>;
using CartesianMultiLineString = MultiLineString<CartesianPoint>;
using CartesianRing = Ring<CartesianPoint>;
using CartesianPolygon = Polygon<CartesianPoint>;
using CartesianMultiPolygon = MultiPolygon<CartesianPoint>;
using SphericalPoint = boost::geometry::model::point<Float64, 2, boost::geometry::cs::spherical_equatorial<boost::geometry::degree>>;
using SphericalLineString = LineString<SphericalPoint>;
using SphericalMultiLineString = MultiLineString<SphericalPoint>;
using SphericalRing = Ring<SphericalPoint>;
using SphericalPolygon = Polygon<SphericalPoint>;
using SphericalMultiPolygon = MultiPolygon<SphericalPoint>;
@ -113,6 +118,28 @@ struct ColumnToLineStringsConverter
}
};
/**
* Class which converts Column with type Array(Array(Tuple(Float64, Float64))) to a vector of boost multi_linestring type.
*/
template <typename Point>
struct ColumnToMultiLineStringsConverter
{
static std::vector<MultiLineString<Point>> convert(ColumnPtr col)
{
const IColumn::Offsets & offsets = typeid_cast<const ColumnArray &>(*col).getOffsets();
size_t prev_offset = 0;
std::vector<MultiLineString<Point>> answer(offsets.size());
auto all_linestrings = ColumnToLineStringsConverter<Point>::convert(typeid_cast<const ColumnArray &>(*col).getDataPtr());
for (size_t iter = 0; iter < offsets.size() && iter < all_linestrings.size(); ++iter)
{
for (size_t linestring_iter = prev_offset; linestring_iter < offsets[iter]; ++linestring_iter)
answer[iter].emplace_back(std::move(all_linestrings[linestring_iter]));
prev_offset = offsets[iter];
}
return answer;
}
};
/**
* Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type.
*/
@ -268,6 +295,38 @@ private:
ColumnUInt64::MutablePtr offsets;
};
/// Serialize Point, MultiLineString as MultiLineString
template <typename Point>
class MultiLineStringSerializer
{
public:
MultiLineStringSerializer()
: offsets(ColumnUInt64::create())
{}
explicit MultiLineStringSerializer(size_t n)
: offsets(ColumnUInt64::create(n))
{}
void add(const MultiLineString<Point> & multilinestring)
{
size += multilinestring.size();
offsets->insertValue(size);
for (const auto & linestring : multilinestring)
linestring_serializer.add(linestring);
}
ColumnPtr finalize()
{
return ColumnArray::create(linestring_serializer.finalize(), std::move(offsets));
}
private:
size_t size = 0;
LineStringSerializer<Point> linestring_serializer;
ColumnUInt64::MutablePtr offsets;
};
/// Almost the same as LineStringSerializer
/// Serialize Point, Ring as Ring
template <typename Point>
@ -411,6 +470,11 @@ static void callOnGeometryDataType(DataTypePtr type, F && f)
else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString")
return f(ConverterType<ColumnToLineStringsConverter<Point>>());
/// We should take the name into consideration to avoid ambiguity.
/// Because for example both MultiLineString and Polygon are resolved to Array(Array(Point)).
else if (factory.get("MultiLineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "MultiLineString")
return f(ConverterType<ColumnToMultiLineStringsConverter<Point>>());
/// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring.
else if (factory.get("Ring")->equals(*type))
return f(ConverterType<ColumnToRingsConverter<Point>>());

View File

@ -75,6 +75,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -73,6 +73,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -73,6 +73,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -77,6 +77,8 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName());
else if constexpr (std::is_same_v<ColumnToLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName());
else if constexpr (std::is_same_v<ColumnToMultiLineStringsConverter<Point>, LeftConverter> || std::is_same_v<ColumnToMultiLineStringsConverter<Point>, RightConverter>)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName());
else
{
auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst());

View File

@ -87,6 +87,11 @@ struct ReadWKTLineStringNameHolder
static constexpr const char * name = "readWKTLineString";
};
struct ReadWKTMultiLineStringNameHolder
{
static constexpr const char * name = "readWKTMultiLineString";
};
struct ReadWKTRingNameHolder
{
static constexpr const char * name = "readWKTRing";
@ -131,6 +136,31 @@ Parses a Well-Known Text (WKT) representation of a LineString geometry and retur
},
.categories{"Unique identifiers"}
});
factory.registerFunction<FunctionReadWKT<DataTypeMultiLineStringName, CartesianMultiLineString, MultiLineStringSerializer<CartesianPoint>, ReadWKTMultiLineStringNameHolder>>(FunctionDocumentation
{
.description=R"(
Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format.
)",
.syntax = "readWKTMultiLineString(wkt_string)",
.arguments{
{"wkt_string", "The input WKT string representing a MultiLineString geometry."}
},
.returned_value = "The function returns a ClickHouse internal representation of the multilinestring geometry.",
.examples{
{"first call", "SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');", R"(
readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')
[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]]
)"},
{"second call", "SELECT toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));", R"(
toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'))
MultiLineString
)"},
},
.categories{"Unique identifiers"}
});
factory.registerFunction<FunctionReadWKT<DataTypeRingName, CartesianRing, RingSerializer<CartesianPoint>, ReadWKTRingNameHolder>>();
factory.registerFunction<FunctionReadWKT<DataTypePolygonName, CartesianPolygon, PolygonSerializer<CartesianPoint>, ReadWKTPolygonNameHolder>>();
factory.registerFunction<FunctionReadWKT<DataTypeMultiPolygonName, CartesianMultiPolygon, MultiPolygonSerializer<CartesianPoint>, ReadWKTMultiPolygonNameHolder>>();

View File

@ -54,8 +54,7 @@ namespace
}
};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
auto func_cast = func_builder_cast->build(cast_args);
auto func_cast = createInternalCast(arguments[0], result_type, CastType::nonAccurate, {});
return func_cast->execute(cast_args, result_type, arguments[0].column->size());
}
};

View File

@ -301,11 +301,11 @@ const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const Da
column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
column.type = std::make_shared<DataTypeString>();
const auto * cast_type_constant_node = &addColumn(std::move(column));
const auto * cast_type_constant_node = &addColumn(column);
ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
auto func_base_cast = createInternalCast(ColumnWithTypeAndName{node_to_cast.result_type, node_to_cast.result_name}, cast_type, CastType::nonAccurate, {});
return addFunction(func_builder_cast, std::move(children), result_name);
return addFunction(func_base_cast, std::move(children), result_name);
}
const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
@ -1547,11 +1547,11 @@ ActionsDAG ActionsDAG::makeConvertingActions(
const auto * left_arg = dst_node;
CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast
= createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic));
ColumnWithTypeAndName left_column{nullptr, dst_node->result_type, {}};
auto func_base_cast = createInternalCast(std::move(left_column), res_elem.type, CastType::nonAccurate, std::move(diagnostic));
NodeRawConstPtrs children = { left_arg, right_arg };
dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {});
dst_node = &actions_dag.addFunction(func_base_cast, std::move(children), {});
}
if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))

View File

@ -1,4 +1,4 @@
#include <Interpreters/HashJoin/HashJoinMethods.h>
#include <Interpreters/HashJoin/HashJoinMethodsImpl.h>
namespace DB
{

View File

@ -291,12 +291,13 @@ void HashJoin::dataMapInit(MapsVariant & map)
{
if (kind == JoinKind::Cross)
return;
joinDispatchInit(kind, strictness, map);
joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); });
auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr;
joinDispatchInit(kind, strictness, map, prefer_use_maps_all);
joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.create(data->type); });
if (reserve_num)
{
joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); });
joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); });
}
if (!data)
@ -327,9 +328,10 @@ size_t HashJoin::getTotalRowCount() const
}
else
{
auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr;
for (const auto & map : data->maps)
{
joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); });
joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); });
}
}
@ -367,9 +369,10 @@ size_t HashJoin::getTotalByteCount() const
if (data->type != Type::CROSS)
{
auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr;
for (const auto & map : data->maps)
{
joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); });
joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); });
}
}
return res;
@ -520,6 +523,8 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
return true;
}
bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr;
size_t total_rows = 0;
size_t total_bytes = 0;
{
@ -592,7 +597,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
bool is_inserted = false;
if (kind != JoinKind::Cross)
{
joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map)
joinDispatch(kind, strictness, data->maps[onexpr_idx], prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & map)
{
size_t size = HashJoinMethods<kind_, strictness_, std::decay_t<decltype(map)>>::insertFromBlockImpl(
*this,
@ -608,10 +613,10 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
is_inserted);
if (flag_per_row)
used_flags->reinit<kind_, strictness_>(stored_block);
used_flags->reinit<kind_, strictness_, std::is_same_v<std::decay_t<decltype(map)>, MapsAll>>(stored_block);
else if (is_inserted)
/// Number of buckets + 1 value from zero storage
used_flags->reinit<kind_, strictness_>(size + 1);
used_flags->reinit<kind_, strictness_, std::is_same_v<std::decay_t<decltype(map)>, MapsAll>>(size + 1);
});
}
@ -873,7 +878,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block
keys.insert(std::move(key));
}
static_assert(!MapGetter<JoinKind::Left, JoinStrictness::Any>::flagged,
static_assert(!MapGetter<JoinKind::Left, JoinStrictness::Any, false>::flagged,
"joinGet are not protected from hash table changes between block processing");
std::vector<const MapsOne *> maps_vector;
@ -914,16 +919,34 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
materializeBlockInplace(block);
}
bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr;
{
std::vector<const std::decay_t<decltype(data->maps[0])> * > maps_vector;
for (size_t i = 0; i < table_join->getClauses().size(); ++i)
maps_vector.push_back(&data->maps[i]);
if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_)
if (joinDispatch(kind, strictness, maps_vector, prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & maps_vector_)
{
using MapType = typename MapGetter<kind_, strictness_>::Map;
Block remaining_block = HashJoinMethods<kind_, strictness_, MapType>::joinBlockImpl(
*this, block, sample_block_with_columns_to_add, maps_vector_);
Block remaining_block;
if constexpr (std::is_same_v<std::decay_t<decltype(maps_vector_)>, std::vector<const MapsAll *>>)
{
remaining_block = HashJoinMethods<kind_, strictness_, MapsAll>::joinBlockImpl(
*this, block, sample_block_with_columns_to_add, maps_vector_);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(maps_vector_)>, std::vector<const MapsOne *>>)
{
remaining_block = HashJoinMethods<kind_, strictness_, MapsOne>::joinBlockImpl(
*this, block, sample_block_with_columns_to_add, maps_vector_);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(maps_vector_)>, std::vector<const MapsAsof *>>)
{
remaining_block = HashJoinMethods<kind_, strictness_, MapsAsof>::joinBlockImpl(
*this, block, sample_block_with_columns_to_add, maps_vector_);
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown maps type");
}
if (remaining_block.rows())
not_processed = std::make_shared<ExtraBlock>(ExtraBlock{std::move(remaining_block)});
else
@ -1023,7 +1046,8 @@ public:
rows_added = fillColumnsFromMap(map, columns_right);
};
if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback))
bool prefer_use_maps_all = parent.table_join->getMixedJoinExpression() != nullptr;
if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), prefer_use_maps_all, fill_callback))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness);
}
@ -1220,11 +1244,12 @@ void HashJoin::reuseJoinedData(const HashJoin & join)
if (flag_per_row)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported");
bool prefer_use_maps_all = join.table_join->getMixedJoinExpression() != nullptr;
for (auto & map : data->maps)
{
joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_)
joinDispatch(kind, strictness, map, prefer_use_maps_all, [this](auto kind_, auto strictness_, auto & map_)
{
used_flags->reinit<kind_, strictness_>(map_.getBufferSizeInCells(data->type) + 1);
used_flags->reinit<kind_, strictness_, std::is_same_v<std::decay_t<decltype(map_)>, MapsAll>>(map_.getBufferSizeInCells(data->type) + 1);
});
}
}
@ -1304,7 +1329,9 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona
additional_filter_expression->dumpActions());
}
bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind));
bool is_supported = ((strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)))
|| ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Any || strictness == JoinStrictness::Anti)
&& (isLeft(kind) || isRight(kind))) || (strictness == JoinStrictness::Any && (isInner(kind)));
if (!is_supported)
{
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,

View File

@ -12,15 +12,8 @@
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNSUPPORTED_JOIN_KEYS;
extern const int LOGICAL_ERROR;
}
/// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN.
template <typename HashMap, typename KeyGetter>
struct Inserter
@ -64,7 +57,6 @@ struct Inserter
}
};
/// MapsTemplate is one of MapsOne, MapsAll and MapsAsof
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
class HashJoinMethods
@ -81,27 +73,7 @@ public:
ConstNullMapPtr null_map,
UInt8ColumnDataPtr join_mask,
Arena & pool,
bool & is_inserted)
{
switch (type)
{
case HashJoin::Type::EMPTY:
[[fallthrough]];
case HashJoin::Type::CROSS:
/// Do nothing. We will only save block, and it is enough
is_inserted = true;
return 0;
#define M(TYPE) \
case HashJoin::Type::TYPE: \
return insertFromBlockImplTypeCase<typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
}
}
bool & is_inserted);
using MapsTemplateVector = std::vector<const MapsTemplate *>;
@ -110,280 +82,36 @@ public:
Block & block,
const Block & block_with_columns_to_add,
const MapsTemplateVector & maps_,
bool is_join_get = false)
{
constexpr JoinFeatures<KIND, STRICTNESS> join_features;
std::vector<JoinOnKeyColumns> join_on_keys;
const auto & onexprs = join.table_join->getClauses();
for (size_t i = 0; i < onexprs.size(); ++i)
{
const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right;
join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]);
}
size_t existing_columns = block.columns();
/** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
* Because if they are constants, then in the "not joined" rows, they may have different values
* - default values, which can differ from the values of these constants.
*/
if constexpr (join_features.right || join_features.full)
{
materializeBlockInplace(block);
}
/** For LEFT/INNER JOIN, the saved blocks do not contain keys.
* For FULL/RIGHT JOIN, the saved blocks contain keys;
* but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped.
* For ASOF, the last column is used as the ASOF column
*/
AddedColumns<!join_features.is_any_join> added_columns(
block,
block_with_columns_to_add,
join.savedBlockSample(),
join,
std::move(join_on_keys),
join.table_join->getMixedJoinExpression(),
join_features.is_asof_join,
is_join_get);
bool has_required_right_keys = (join.required_right_keys.columns() != 0);
added_columns.need_filter = join_features.need_filter || has_required_right_keys;
added_columns.max_joined_block_rows = join.max_joined_block_rows;
if (!added_columns.max_joined_block_rows)
added_columns.max_joined_block_rows = std::numeric_limits<size_t>::max();
else
added_columns.reserve(join_features.need_replication);
size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags);
/// Do not hold memory for join_on_keys anymore
added_columns.join_on_keys.clear();
Block remaining_block = sliceBlock(block, num_joined);
added_columns.buildOutput();
for (size_t i = 0; i < added_columns.size(); ++i)
block.insert(added_columns.moveColumn(i));
std::vector<size_t> right_keys_to_replicate [[maybe_unused]];
if constexpr (join_features.need_filter)
{
/// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1);
/// Add join key columns from right block if needed using value from left table because of equality
for (size_t i = 0; i < join.required_right_keys.columns(); ++i)
{
const auto & right_key = join.required_right_keys.getByPosition(i);
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back())
continue;
const auto & left_column = block.getByName(join.required_right_keys_sources[i]);
const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column);
block.insert(std::move(right_col));
}
}
else if (has_required_right_keys)
{
/// Add join key columns from right block if needed.
for (size_t i = 0; i < join.required_right_keys.columns(); ++i)
{
const auto & right_key = join.required_right_keys.getByPosition(i);
auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name);
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back())
continue;
const auto & left_column = block.getByName(join.required_right_keys_sources[i]);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter);
block.insert(std::move(right_col));
if constexpr (join_features.need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
}
}
if constexpr (join_features.need_replication)
{
std::unique_ptr<IColumn::Offsets> & offsets_to_replicate = added_columns.offsets_to_replicate;
/// If ALL ... JOIN - we replicate all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
{
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
}
/// Replicate additional right keys
for (size_t pos : right_keys_to_replicate)
{
block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate);
}
}
return remaining_block;
}
bool is_join_get = false);
private:
template <typename KeyGetter, bool is_asof_join>
static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes)
{
if constexpr (is_asof_join)
{
auto key_column_copy = key_columns;
auto key_size_copy = key_sizes;
key_column_copy.pop_back();
key_size_copy.pop_back();
return KeyGetter(key_column_copy, key_size_copy, nullptr);
}
else
return KeyGetter(key_columns, key_sizes, nullptr);
}
static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes);
template <typename KeyGetter, typename HashMap>
static size_t NO_INLINE insertFromBlockImplTypeCase(
static size_t insertFromBlockImplTypeCase(
HashJoin & join, HashMap & map, size_t rows, const ColumnRawPtrs & key_columns,
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
{
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename HashMap::mapped_type, RowRef>;
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
const IColumn * asof_column [[maybe_unused]] = nullptr;
if constexpr (is_asof_join)
asof_column = key_columns.back();
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
/// For ALL and ASOF join always insert values
is_inserted = !mapped_one || is_asof_join;
for (size_t i = 0; i < rows; ++i)
{
if (null_map && (*null_map)[i])
{
/// nulls are not inserted into hash table,
/// keep them for RIGHT and FULL joins
is_inserted = true;
continue;
}
/// Check condition for right table from ON section
if (join_mask && !(*join_mask)[i])
continue;
if constexpr (is_asof_join)
Inserter<HashMap, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column);
else if constexpr (mapped_one)
is_inserted |= Inserter<HashMap, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
else
Inserter<HashMap, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
}
return map.getBufferSizeInCells();
}
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted);
template <typename AddedColumns>
static size_t switchJoinRightColumns(
const std::vector<const MapsTemplate *> & mapv,
AddedColumns & added_columns,
HashJoin::Type type,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
switch (type)
{
case HashJoin::Type::EMPTY: {
if constexpr (!is_asof_join)
{
using KeyGetter = KeyGetterEmpty<typename MapsTemplate::MappedType>;
std::vector<KeyGetter> key_getter_vector;
key_getter_vector.emplace_back();
using MapTypeVal = typename KeyGetter::MappedType;
std::vector<const MapTypeVal *> a_map_type_vector;
a_map_type_vector.emplace_back();
return joinRightColumnsSwitchNullability<KeyGetter>(
std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags);
}
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type);
}
#define M(TYPE) \
case HashJoin::Type::TYPE: \
{ \
using MapTypeVal = const typename std::remove_reference_t<decltype(MapsTemplate::TYPE)>::element_type; \
using KeyGetter = typename KeyGetterForType<HashJoin::Type::TYPE, MapTypeVal>::Type; \
std::vector<const MapTypeVal *> a_map_type_vector(mapv.size()); \
std::vector<KeyGetter> key_getter_vector; \
for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \
{ \
const auto & join_on_key = added_columns.join_on_keys[d]; \
a_map_type_vector[d] = mapv[d]->TYPE.get(); \
key_getter_vector.push_back(std::move(createKeyGetter<KeyGetter, is_asof_join>(join_on_key.key_columns, join_on_key.key_sizes))); \
} \
return joinRightColumnsSwitchNullability<KeyGetter>( \
std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \
}
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type);
}
}
JoinStuff::JoinUsedFlags & used_flags);
template <typename KeyGetter, typename Map, typename AddedColumns>
static size_t joinRightColumnsSwitchNullability(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
if (added_columns.need_filter)
{
return joinRightColumnsSwitchMultipleDisjuncts<KeyGetter, Map, true>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
else
{
return joinRightColumnsSwitchMultipleDisjuncts<KeyGetter, Map, false>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
}
JoinStuff::JoinUsedFlags & used_flags);
template <typename KeyGetter, typename Map, bool need_filter, typename AddedColumns>
static size_t joinRightColumnsSwitchMultipleDisjuncts(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr JoinFeatures<KIND, STRICTNESS> join_features;
if constexpr (join_features.is_all_join)
{
if (added_columns.additional_filter_expression)
{
bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1;
return joinRightColumnsWithAddtitionalFilter<KeyGetter, Map, join_features.need_replication>(
std::forward<std::vector<KeyGetter>>(key_getter_vector),
mapv,
added_columns,
used_flags,
need_filter,
join_features.need_flags,
join_features.add_missing,
mark_per_row_used);
}
}
if (added_columns.additional_filter_expression)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN");
return mapv.size() > 1 ? joinRightColumns<KeyGetter, Map, need_filter, true>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags)
: joinRightColumns<KeyGetter, Map, need_filter, false>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
JoinStuff::JoinUsedFlags & used_flags);
/// Joins right table columns which indexes are present in right_indexes using specified map.
/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
@ -392,464 +120,30 @@ private:
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr JoinFeatures<KIND, STRICTNESS> join_features;
size_t rows = added_columns.rows_to_add;
if constexpr (need_filter)
added_columns.filter = IColumn::Filter(rows, 0);
Arena pool;
if constexpr (join_features.need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
IColumn::Offset current_offset = 0;
size_t max_joined_block_rows = added_columns.max_joined_block_rows;
size_t i = 0;
for (; i < rows; ++i)
{
if constexpr (join_features.need_replication)
{
if (unlikely(current_offset >= max_joined_block_rows))
{
added_columns.offsets_to_replicate->resize_assume_reserved(i);
added_columns.filter.resize_assume_reserved(i);
break;
}
}
bool right_row_found = false;
KnownRowsHolder<flag_per_row> known_rows;
for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx)
{
const auto & join_keys = added_columns.join_on_keys[onexpr_idx];
if (join_keys.null_map && (*join_keys.null_map)[i])
continue;
bool row_acceptable = !join_keys.isRowFiltered(i);
using FindResult = typename KeyGetter::FindResult;
auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult();
if (find_result.isFound())
{
right_row_found = true;
auto & mapped = find_result.getMapped();
if constexpr (join_features.is_asof_join)
{
const IColumn & left_asof_key = added_columns.leftAsofKey();
auto row_ref = mapped->findAsof(left_asof_key, i);
if (row_ref.block)
{
setUsed<need_filter>(added_columns.filter, i);
if constexpr (flag_per_row)
used_flags.template setUsed<join_features.need_flags, flag_per_row>(row_ref.block, row_ref.row_num, 0);
else
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing);
}
else
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, current_offset);
}
else if constexpr (join_features.is_all_join)
{
setUsed<need_filter>(added_columns.filter, i);
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr;
addFoundRowAll<Map, join_features.add_missing>(mapped, added_columns, current_offset, known_rows, used_flags_opt);
}
else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right)
{
/// Use first appeared left key + it needs left columns replication
bool used_once = used_flags.template setUsedOnce<join_features.need_flags, flag_per_row>(find_result);
if (used_once)
{
auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr;
setUsed<need_filter>(added_columns.filter, i);
addFoundRowAll<Map, join_features.add_missing>(
mapped, added_columns, current_offset, known_rows, used_flags_opt);
}
}
else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner)
{
bool used_once = used_flags.template setUsedOnce<join_features.need_flags, flag_per_row>(find_result);
/// Use first appeared left key only
if (used_once)
{
setUsed<need_filter>(added_columns.filter, i);
added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing);
}
break;
}
else if constexpr (join_features.is_any_join && join_features.full)
{
/// TODO
}
else if constexpr (join_features.is_anti_join)
{
if constexpr (join_features.right && join_features.need_flags)
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
}
else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
{
setUsed<need_filter>(added_columns.filter, i);
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing);
if (join_features.is_any_or_semi_join)
{
break;
}
}
}
}
if (!right_row_found)
{
if constexpr (join_features.is_anti_join && join_features.left)
setUsed<need_filter>(added_columns.filter, i);
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, current_offset);
}
if constexpr (join_features.need_replication)
{
(*added_columns.offsets_to_replicate)[i] = current_offset;
}
}
added_columns.applyLazyDefaults();
return i;
}
JoinStuff::JoinUsedFlags & used_flags);
template <bool need_filter>
static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]])
{
if constexpr (need_filter)
filter[pos] = 1;
}
static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]);
template <typename AddedColumns>
static ColumnPtr buildAdditionalFilter(
size_t left_start_row,
const std::vector<RowRef> & selected_rows,
const std::vector<size_t> & row_replicate_offset,
AddedColumns & added_columns)
{
ColumnPtr result_column;
do
{
if (selected_rows.empty())
{
result_column = ColumnUInt8::create();
break;
}
const Block & sample_right_block = *selected_rows.begin()->block;
if (!sample_right_block || !added_columns.additional_filter_expression)
{
auto filter = ColumnUInt8::create();
filter->insertMany(1, selected_rows.size());
result_column = std::move(filter);
break;
}
auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes();
if (required_cols.empty())
{
Block block;
added_columns.additional_filter_expression->execute(block);
result_column = block.getByPosition(0).column->cloneResized(selected_rows.size());
break;
}
NameSet required_column_names;
for (auto & col : required_cols)
required_column_names.insert(col.name);
Block executed_block;
size_t right_col_pos = 0;
for (const auto & col : sample_right_block.getColumnsWithTypeAndName())
{
if (required_column_names.contains(col.name))
{
auto new_col = col.column->cloneEmpty();
for (const auto & selected_row : selected_rows)
{
const auto & src_col = selected_row.block->getByPosition(right_col_pos);
new_col->insertFrom(*src_col.column, selected_row.row_num);
}
executed_block.insert({std::move(new_col), col.type, col.name});
}
right_col_pos += 1;
}
if (!executed_block)
{
result_column = ColumnUInt8::create();
break;
}
for (const auto & col_name : required_column_names)
{
const auto * src_col = added_columns.left_block.findByName(col_name);
if (!src_col)
continue;
auto new_col = src_col->column->cloneEmpty();
size_t prev_left_offset = 0;
for (size_t i = 1; i < row_replicate_offset.size(); ++i)
{
const size_t & left_offset = row_replicate_offset[i];
size_t rows = left_offset - prev_left_offset;
if (rows)
new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows);
prev_left_offset = left_offset;
}
executed_block.insert({std::move(new_col), src_col->type, col_name});
}
if (!executed_block)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"required columns: [{}], but not found any in left/right table. right table: {}, left table: {}",
required_cols.toString(),
sample_right_block.dumpNames(),
added_columns.left_block.dumpNames());
}
for (const auto & col : executed_block.getColumnsWithTypeAndName())
if (!col.column || !col.type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure());
added_columns.additional_filter_expression->execute(executed_block);
result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst();
executed_block.clear();
} while (false);
result_column = result_column->convertToFullIfNeeded();
if (result_column->isNullable())
{
/// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros
/// Trying to avoid copying data, since we are the only owner of the column.
ColumnPtr mask_column = assert_cast<const ColumnNullable &>(*result_column).getNullMapColumnPtr();
MutableColumnPtr mutable_column;
{
ColumnPtr nested_column = assert_cast<const ColumnNullable &>(*result_column).getNestedColumnPtr();
result_column.reset();
mutable_column = IColumn::mutate(std::move(nested_column));
}
auto & column_data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
const auto & mask_column_data = assert_cast<const ColumnUInt8 &>(*mask_column).getData();
for (size_t i = 0; i < column_data.size(); ++i)
{
if (mask_column_data[i])
column_data[i] = 0;
}
return mutable_column;
}
return result_column;
}
AddedColumns & added_columns);
/// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression.
template <typename KeyGetter, typename Map, bool need_replication, typename AddedColumns>
template <typename KeyGetter, typename Map, typename AddedColumns>
static size_t joinRightColumnsWithAddtitionalFilter(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]],
bool need_filter [[maybe_unused]],
bool need_flags [[maybe_unused]],
bool add_missing [[maybe_unused]],
bool flag_per_row [[maybe_unused]])
{
size_t left_block_rows = added_columns.rows_to_add;
if (need_filter)
added_columns.filter = IColumn::Filter(left_block_rows, 0);
std::unique_ptr<Arena> pool;
if constexpr (need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(left_block_rows);
std::vector<size_t> row_replicate_offset;
row_replicate_offset.reserve(left_block_rows);
using FindResult = typename KeyGetter::FindResult;
size_t max_joined_block_rows = added_columns.max_joined_block_rows;
size_t left_row_iter = 0;
PreSelectedRows selected_rows;
selected_rows.reserve(left_block_rows);
std::vector<FindResult> find_results;
find_results.reserve(left_block_rows);
bool exceeded_max_block_rows = false;
IColumn::Offset total_added_rows = 0;
IColumn::Offset current_added_rows = 0;
auto collect_keys_matched_rows_refs = [&]()
{
pool = std::make_unique<Arena>();
find_results.clear();
row_replicate_offset.clear();
row_replicate_offset.push_back(0);
current_added_rows = 0;
selected_rows.clear();
for (; left_row_iter < left_block_rows; ++left_row_iter)
{
if constexpr (need_replication)
{
if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows))
{
break;
}
}
KnownRowsHolder<true> all_flag_known_rows;
KnownRowsHolder<false> single_flag_know_rows;
for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx)
{
const auto & join_keys = added_columns.join_on_keys[join_clause_idx];
if (join_keys.null_map && (*join_keys.null_map)[left_row_iter])
continue;
bool row_acceptable = !join_keys.isRowFiltered(left_row_iter);
auto find_result = row_acceptable
? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool)
: FindResult();
if (find_result.isFound())
{
auto & mapped = find_result.getMapped();
find_results.push_back(find_result);
if (flag_per_row)
addFoundRowAll<Map, false, true>(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr);
else
addFoundRowAll<Map, false, false>(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr);
}
}
row_replicate_offset.push_back(current_added_rows);
}
};
auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col)
{
const PaddedPODArray<UInt8> & filter_flags = assert_cast<const ColumnUInt8 &>(*filter_col).getData();
size_t prev_replicated_row = 0;
auto selected_right_row_it = selected_rows.begin();
size_t find_result_index = 0;
for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i)
{
bool any_matched = false;
/// For all right join, flag_per_row is true, we need mark used flags for each row.
if (flag_per_row)
{
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
{
if (filter_flags[replicated_row])
{
any_matched = true;
added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing);
total_added_rows += 1;
if (need_flags)
used_flags.template setUsed<true, true>(selected_right_row_it->block, selected_right_row_it->row_num, 0);
}
++selected_right_row_it;
}
}
else
{
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
{
if (filter_flags[replicated_row])
{
any_matched = true;
added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing);
total_added_rows += 1;
}
++selected_right_row_it;
}
}
if (!any_matched)
{
if (add_missing)
addNotFoundRow<true, need_replication>(added_columns, total_added_rows);
else
addNotFoundRow<false, need_replication>(added_columns, total_added_rows);
}
else
{
if (!flag_per_row && need_flags)
used_flags.template setUsed<true, false>(find_results[find_result_index]);
if (need_filter)
setUsed<true>(added_columns.filter, left_start_row + i - 1);
if (add_missing)
added_columns.applyLazyDefaults();
}
find_result_index += (prev_replicated_row != row_replicate_offset[i]);
if constexpr (need_replication)
{
(*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows;
}
prev_replicated_row = row_replicate_offset[i];
}
};
while (left_row_iter < left_block_rows && !exceeded_max_block_rows)
{
auto left_start_row = left_row_iter;
collect_keys_matched_rows_refs();
if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, "
"left_start_row: {}",
selected_rows.size(),
current_added_rows,
row_replicate_offset.size(),
left_row_iter,
left_start_row);
}
auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns);
copy_final_matched_rows(left_start_row, filter_col);
if constexpr (need_replication)
{
// Add a check for current_added_rows to avoid run the filter expression on too small size batch.
if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024)
exceeded_max_block_rows = true;
}
}
if constexpr (need_replication)
{
added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter);
added_columns.filter.resize_assume_reserved(left_row_iter);
}
added_columns.applyLazyDefaults();
return left_row_iter;
}
bool flag_per_row [[maybe_unused]]);
/// Cut first num_rows rows from block in place and returns block with remaining rows
static Block sliceBlock(Block & block, size_t num_rows)
{
size_t total_rows = block.rows();
if (num_rows >= total_rows)
return {};
size_t remaining_rows = total_rows - num_rows;
Block remaining_block = block.cloneEmpty();
for (size_t i = 0; i < block.columns(); ++i)
{
auto & col = block.getByPosition(i);
remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows);
col.column = col.column->cut(0, num_rows);
}
return remaining_block;
}
static Block sliceBlock(Block & block, size_t num_rows);
/** Since we do not store right key columns,
* this function is used to copy left key columns to right key columns.
@ -864,70 +158,22 @@ private:
const DataTypePtr & right_key_type,
const String & renamed_right_column,
const ColumnWithTypeAndName & left_column,
const IColumn::Filter * null_map_filter = nullptr)
{
ColumnWithTypeAndName right_column = left_column;
right_column.name = renamed_right_column;
const IColumn::Filter * null_map_filter = nullptr);
if (null_map_filter)
right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter);
static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable);
bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type);
if (null_map_filter)
correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter);
else
correctNullabilityInplace(right_column, should_be_nullable);
if (!right_column.type->equals(*right_key_type))
{
right_column.column = castColumnAccurate(right_column, right_key_type);
right_column.type = right_key_type;
}
right_column.column = right_column.column->convertToFullColumnIfConst();
return right_column;
}
static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable)
{
if (nullable)
{
JoinCommon::convertColumnToNullable(column);
}
else
{
/// We have to replace values masked by NULLs with defaults.
if (column.column)
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&*column.column))
column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true);
JoinCommon::removeColumnNullability(column);
}
}
static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map)
{
if (nullable)
{
JoinCommon::convertColumnToNullable(column);
if (column.type->isNullable() && !negative_null_map.empty())
{
MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column));
assert_cast<ColumnNullable &>(*mutable_column).applyNegatedNullMap(negative_null_map);
column.column = std::move(mutable_column);
}
}
else
JoinCommon::removeColumnNullability(column);
}
static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map);
};
/// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`.
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::RightAny, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Any, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Any, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::All, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Semi, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Semi, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Anti, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Anti, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Left, JoinStrictness::Asof, HashJoin::MapsAsof>;
extern template class HashJoinMethods<JoinKind::Right, JoinStrictness::RightAny, HashJoin::MapsOne>;
@ -939,6 +185,7 @@ extern template class HashJoinMethods<JoinKind::Right, JoinStrictness::Asof, Has
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::RightAny, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Any, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Any, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::All, HashJoin::MapsAll>;
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Semi, HashJoin::MapsOne>;
extern template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Anti, HashJoin::MapsOne>;

View File

@ -0,0 +1,936 @@
#pragma once
#include <Interpreters/HashJoin/HashJoinMethods.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNSUPPORTED_JOIN_KEYS;
extern const int LOGICAL_ERROR;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::insertFromBlockImpl(
HashJoin & join,
HashJoin::Type type,
MapsTemplate & maps,
size_t rows,
const ColumnRawPtrs & key_columns,
const Sizes & key_sizes,
Block * stored_block,
ConstNullMapPtr null_map,
UInt8ColumnDataPtr join_mask,
Arena & pool,
bool & is_inserted)
{
switch (type)
{
case HashJoin::Type::EMPTY:
[[fallthrough]];
case HashJoin::Type::CROSS:
/// Do nothing. We will only save block, and it is enough
is_inserted = true;
return 0;
#define M(TYPE) \
case HashJoin::Type::TYPE: \
return insertFromBlockImplTypeCase< \
typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>( \
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
}
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
Block HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinBlockImpl(
const HashJoin & join, Block & block, const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, bool is_join_get)
{
constexpr JoinFeatures<KIND, STRICTNESS, MapsTemplate> join_features;
std::vector<JoinOnKeyColumns> join_on_keys;
const auto & onexprs = join.table_join->getClauses();
for (size_t i = 0; i < onexprs.size(); ++i)
{
const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right;
join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]);
}
size_t existing_columns = block.columns();
/** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
* Because if they are constants, then in the "not joined" rows, they may have different values
* - default values, which can differ from the values of these constants.
*/
if constexpr (join_features.right || join_features.full)
{
materializeBlockInplace(block);
}
/** For LEFT/INNER JOIN, the saved blocks do not contain keys.
* For FULL/RIGHT JOIN, the saved blocks contain keys;
* but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped.
* For ASOF, the last column is used as the ASOF column
*/
AddedColumns<!join_features.is_any_join> added_columns(
block,
block_with_columns_to_add,
join.savedBlockSample(),
join,
std::move(join_on_keys),
join.table_join->getMixedJoinExpression(),
join_features.is_asof_join,
is_join_get);
bool has_required_right_keys = (join.required_right_keys.columns() != 0);
added_columns.need_filter = join_features.need_filter || has_required_right_keys;
added_columns.max_joined_block_rows = join.max_joined_block_rows;
if (!added_columns.max_joined_block_rows)
added_columns.max_joined_block_rows = std::numeric_limits<size_t>::max();
else
added_columns.reserve(join_features.need_replication);
size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags);
/// Do not hold memory for join_on_keys anymore
added_columns.join_on_keys.clear();
Block remaining_block = sliceBlock(block, num_joined);
added_columns.buildOutput();
for (size_t i = 0; i < added_columns.size(); ++i)
block.insert(added_columns.moveColumn(i));
std::vector<size_t> right_keys_to_replicate [[maybe_unused]];
if constexpr (join_features.need_filter)
{
/// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1);
/// Add join key columns from right block if needed using value from left table because of equality
for (size_t i = 0; i < join.required_right_keys.columns(); ++i)
{
const auto & right_key = join.required_right_keys.getByPosition(i);
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back())
continue;
const auto & left_column = block.getByName(join.required_right_keys_sources[i]);
const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column);
block.insert(std::move(right_col));
}
}
else if (has_required_right_keys)
{
/// Add join key columns from right block if needed.
for (size_t i = 0; i < join.required_right_keys.columns(); ++i)
{
const auto & right_key = join.required_right_keys.getByPosition(i);
auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name);
/// asof column is already in block.
if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back())
continue;
const auto & left_column = block.getByName(join.required_right_keys_sources[i]);
auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter);
block.insert(std::move(right_col));
if constexpr (join_features.need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
}
}
if constexpr (join_features.need_replication)
{
std::unique_ptr<IColumn::Offsets> & offsets_to_replicate = added_columns.offsets_to_replicate;
/// If ALL ... JOIN - we replicate all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
{
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
}
/// Replicate additional right keys
for (size_t pos : right_keys_to_replicate)
{
block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate);
}
}
return remaining_block;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, bool is_asof_join>
KeyGetter HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes)
{
if constexpr (is_asof_join)
{
auto key_column_copy = key_columns;
auto key_size_copy = key_sizes;
key_column_copy.pop_back();
key_size_copy.pop_back();
return KeyGetter(key_column_copy, key_size_copy, nullptr);
}
else
return KeyGetter(key_columns, key_sizes, nullptr);
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, typename HashMap>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::insertFromBlockImplTypeCase(
HashJoin & join,
HashMap & map,
size_t rows,
const ColumnRawPtrs & key_columns,
const Sizes & key_sizes,
Block * stored_block,
ConstNullMapPtr null_map,
UInt8ColumnDataPtr join_mask,
Arena & pool,
bool & is_inserted)
{
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename HashMap::mapped_type, RowRef>;
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
const IColumn * asof_column [[maybe_unused]] = nullptr;
if constexpr (is_asof_join)
asof_column = key_columns.back();
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
/// For ALL and ASOF join always insert values
is_inserted = !mapped_one || is_asof_join;
for (size_t i = 0; i < rows; ++i)
{
if (null_map && (*null_map)[i])
{
/// nulls are not inserted into hash table,
/// keep them for RIGHT and FULL joins
is_inserted = true;
continue;
}
/// Check condition for right table from ON section
if (join_mask && !(*join_mask)[i])
continue;
if constexpr (is_asof_join)
Inserter<HashMap, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column);
else if constexpr (mapped_one)
is_inserted |= Inserter<HashMap, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
else
Inserter<HashMap, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
}
return map.getBufferSizeInCells();
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename AddedColumns>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::switchJoinRightColumns(
const std::vector<const MapsTemplate *> & mapv,
AddedColumns & added_columns,
HashJoin::Type type,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
switch (type)
{
case HashJoin::Type::EMPTY: {
if constexpr (!is_asof_join)
{
using KeyGetter = KeyGetterEmpty<typename MapsTemplate::MappedType>;
std::vector<KeyGetter> key_getter_vector;
key_getter_vector.emplace_back();
using MapTypeVal = typename KeyGetter::MappedType;
std::vector<const MapTypeVal *> a_map_type_vector;
a_map_type_vector.emplace_back();
return joinRightColumnsSwitchNullability<KeyGetter>(
std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags);
}
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type);
}
#define M(TYPE) \
case HashJoin::Type::TYPE: { \
using MapTypeVal = const typename std::remove_reference_t<decltype(MapsTemplate::TYPE)>::element_type; \
using KeyGetter = typename KeyGetterForType<HashJoin::Type::TYPE, MapTypeVal>::Type; \
std::vector<const MapTypeVal *> a_map_type_vector(mapv.size()); \
std::vector<KeyGetter> key_getter_vector; \
for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \
{ \
const auto & join_on_key = added_columns.join_on_keys[d]; \
a_map_type_vector[d] = mapv[d]->TYPE.get(); \
key_getter_vector.push_back( \
std::move(createKeyGetter<KeyGetter, is_asof_join>(join_on_key.key_columns, join_on_key.key_sizes))); \
} \
return joinRightColumnsSwitchNullability<KeyGetter>(std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \
}
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type);
}
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, typename Map, typename AddedColumns>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinRightColumnsSwitchNullability(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
if (added_columns.need_filter)
{
return joinRightColumnsSwitchMultipleDisjuncts<KeyGetter, Map, true>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
else
{
return joinRightColumnsSwitchMultipleDisjuncts<KeyGetter, Map, false>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, typename Map, bool need_filter, typename AddedColumns>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinRightColumnsSwitchMultipleDisjuncts(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr JoinFeatures<KIND, STRICTNESS, MapsTemplate> join_features;
if constexpr (join_features.is_maps_all)
{
if (added_columns.additional_filter_expression)
{
bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1;
return joinRightColumnsWithAddtitionalFilter<KeyGetter, Map>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags, need_filter, mark_per_row_used);
}
}
if (added_columns.additional_filter_expression)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN");
return mapv.size() > 1 ? joinRightColumns<KeyGetter, Map, need_filter, true>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags)
: joinRightColumns<KeyGetter, Map, need_filter, false>(
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
}
/// Joins right table columns which indexes are present in right_indexes using specified map.
/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, typename Map, bool need_filter, bool flag_per_row, typename AddedColumns>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinRightColumns(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr JoinFeatures<KIND, STRICTNESS, MapsTemplate> join_features;
size_t rows = added_columns.rows_to_add;
if constexpr (need_filter)
added_columns.filter = IColumn::Filter(rows, 0);
Arena pool;
if constexpr (join_features.need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
IColumn::Offset current_offset = 0;
size_t max_joined_block_rows = added_columns.max_joined_block_rows;
size_t i = 0;
for (; i < rows; ++i)
{
if constexpr (join_features.need_replication)
{
if (unlikely(current_offset >= max_joined_block_rows))
{
added_columns.offsets_to_replicate->resize_assume_reserved(i);
added_columns.filter.resize_assume_reserved(i);
break;
}
}
bool right_row_found = false;
KnownRowsHolder<flag_per_row> known_rows;
for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx)
{
const auto & join_keys = added_columns.join_on_keys[onexpr_idx];
if (join_keys.null_map && (*join_keys.null_map)[i])
continue;
bool row_acceptable = !join_keys.isRowFiltered(i);
using FindResult = typename KeyGetter::FindResult;
auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult();
if (find_result.isFound())
{
right_row_found = true;
auto & mapped = find_result.getMapped();
if constexpr (join_features.is_asof_join)
{
const IColumn & left_asof_key = added_columns.leftAsofKey();
auto row_ref = mapped->findAsof(left_asof_key, i);
if (row_ref.block)
{
setUsed<need_filter>(added_columns.filter, i);
if constexpr (flag_per_row)
used_flags.template setUsed<join_features.need_flags, flag_per_row>(row_ref.block, row_ref.row_num, 0);
else
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing);
}
else
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, current_offset);
}
else if constexpr (join_features.is_all_join)
{
setUsed<need_filter>(added_columns.filter, i);
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr;
addFoundRowAll<Map, join_features.add_missing>(mapped, added_columns, current_offset, known_rows, used_flags_opt);
}
else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right)
{
/// Use first appeared left key + it needs left columns replication
bool used_once = used_flags.template setUsedOnce<join_features.need_flags, flag_per_row>(find_result);
if (used_once)
{
auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr;
setUsed<need_filter>(added_columns.filter, i);
addFoundRowAll<Map, join_features.add_missing>(mapped, added_columns, current_offset, known_rows, used_flags_opt);
}
}
else if constexpr (join_features.is_any_join && join_features.inner)
{
bool used_once = used_flags.template setUsedOnce<join_features.need_flags, flag_per_row>(find_result);
/// Use first appeared left key only
if (used_once)
{
setUsed<need_filter>(added_columns.filter, i);
added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing);
}
break;
}
else if constexpr (join_features.is_any_join && join_features.full)
{
/// TODO
}
else if constexpr (join_features.is_anti_join)
{
if constexpr (join_features.right && join_features.need_flags)
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
}
else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
{
setUsed<need_filter>(added_columns.filter, i);
used_flags.template setUsed<join_features.need_flags, flag_per_row>(find_result);
added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing);
if (join_features.is_any_or_semi_join)
{
break;
}
}
}
}
if (!right_row_found)
{
if constexpr (join_features.is_anti_join && join_features.left)
setUsed<need_filter>(added_columns.filter, i);
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, current_offset);
}
if constexpr (join_features.need_replication)
{
(*added_columns.offsets_to_replicate)[i] = current_offset;
}
}
added_columns.applyLazyDefaults();
return i;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <bool need_filter>
void HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]])
{
if constexpr (need_filter)
filter[pos] = 1;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename AddedColumns>
ColumnPtr HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::buildAdditionalFilter(
size_t left_start_row,
const std::vector<RowRef> & selected_rows,
const std::vector<size_t> & row_replicate_offset,
AddedColumns & added_columns)
{
ColumnPtr result_column;
do
{
if (selected_rows.empty())
{
result_column = ColumnUInt8::create();
break;
}
const Block & sample_right_block = *selected_rows.begin()->block;
if (!sample_right_block || !added_columns.additional_filter_expression)
{
auto filter = ColumnUInt8::create();
filter->insertMany(1, selected_rows.size());
result_column = std::move(filter);
break;
}
auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes();
if (required_cols.empty())
{
Block block;
added_columns.additional_filter_expression->execute(block);
result_column = block.getByPosition(0).column->cloneResized(selected_rows.size());
break;
}
NameSet required_column_names;
for (auto & col : required_cols)
required_column_names.insert(col.name);
Block executed_block;
size_t right_col_pos = 0;
for (const auto & col : sample_right_block.getColumnsWithTypeAndName())
{
if (required_column_names.contains(col.name))
{
auto new_col = col.column->cloneEmpty();
for (const auto & selected_row : selected_rows)
{
const auto & src_col = selected_row.block->getByPosition(right_col_pos);
new_col->insertFrom(*src_col.column, selected_row.row_num);
}
executed_block.insert({std::move(new_col), col.type, col.name});
}
right_col_pos += 1;
}
if (!executed_block)
{
result_column = ColumnUInt8::create();
break;
}
for (const auto & col_name : required_column_names)
{
const auto * src_col = added_columns.left_block.findByName(col_name);
if (!src_col)
continue;
auto new_col = src_col->column->cloneEmpty();
size_t prev_left_offset = 0;
for (size_t i = 1; i < row_replicate_offset.size(); ++i)
{
const size_t & left_offset = row_replicate_offset[i];
size_t rows = left_offset - prev_left_offset;
if (rows)
new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows);
prev_left_offset = left_offset;
}
executed_block.insert({std::move(new_col), src_col->type, col_name});
}
if (!executed_block)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"required columns: [{}], but not found any in left/right table. right table: {}, left table: {}",
required_cols.toString(),
sample_right_block.dumpNames(),
added_columns.left_block.dumpNames());
}
for (const auto & col : executed_block.getColumnsWithTypeAndName())
if (!col.column || !col.type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure());
added_columns.additional_filter_expression->execute(executed_block);
result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst();
executed_block.clear();
} while (false);
result_column = result_column->convertToFullIfNeeded();
if (result_column->isNullable())
{
/// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros
/// Trying to avoid copying data, since we are the only owner of the column.
ColumnPtr mask_column = assert_cast<const ColumnNullable &>(*result_column).getNullMapColumnPtr();
MutableColumnPtr mutable_column;
{
ColumnPtr nested_column = assert_cast<const ColumnNullable &>(*result_column).getNestedColumnPtr();
result_column.reset();
mutable_column = IColumn::mutate(std::move(nested_column));
}
auto & column_data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
const auto & mask_column_data = assert_cast<const ColumnUInt8 &>(*mask_column).getData();
for (size_t i = 0; i < column_data.size(); ++i)
{
if (mask_column_data[i])
column_data[i] = 0;
}
return mutable_column;
}
return result_column;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
template <typename KeyGetter, typename Map, typename AddedColumns>
size_t HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::joinRightColumnsWithAddtitionalFilter(
std::vector<KeyGetter> && key_getter_vector,
const std::vector<const Map *> & mapv,
AddedColumns & added_columns,
JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]],
bool need_filter [[maybe_unused]],
bool flag_per_row [[maybe_unused]])
{
constexpr JoinFeatures<KIND, STRICTNESS, MapsTemplate> join_features;
size_t left_block_rows = added_columns.rows_to_add;
if (need_filter)
added_columns.filter = IColumn::Filter(left_block_rows, 0);
std::unique_ptr<Arena> pool;
if constexpr (join_features.need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(left_block_rows);
std::vector<size_t> row_replicate_offset;
row_replicate_offset.reserve(left_block_rows);
using FindResult = typename KeyGetter::FindResult;
size_t max_joined_block_rows = added_columns.max_joined_block_rows;
size_t left_row_iter = 0;
PreSelectedRows selected_rows;
selected_rows.reserve(left_block_rows);
std::vector<FindResult> find_results;
find_results.reserve(left_block_rows);
bool exceeded_max_block_rows = false;
IColumn::Offset total_added_rows = 0;
IColumn::Offset current_added_rows = 0;
auto collect_keys_matched_rows_refs = [&]()
{
pool = std::make_unique<Arena>();
find_results.clear();
row_replicate_offset.clear();
row_replicate_offset.push_back(0);
current_added_rows = 0;
selected_rows.clear();
for (; left_row_iter < left_block_rows; ++left_row_iter)
{
if constexpr (join_features.need_replication)
{
if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows))
{
break;
}
}
KnownRowsHolder<true> all_flag_known_rows;
KnownRowsHolder<false> single_flag_know_rows;
for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx)
{
const auto & join_keys = added_columns.join_on_keys[join_clause_idx];
if (join_keys.null_map && (*join_keys.null_map)[left_row_iter])
continue;
bool row_acceptable = !join_keys.isRowFiltered(left_row_iter);
auto find_result = row_acceptable
? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool)
: FindResult();
if (find_result.isFound())
{
auto & mapped = find_result.getMapped();
find_results.push_back(find_result);
/// We don't add missing in addFoundRowAll here. we will add it after filter is applied.
/// it's different from `joinRightColumns`.
if (flag_per_row)
addFoundRowAll<Map, false, true>(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr);
else
addFoundRowAll<Map, false, false>(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr);
}
}
row_replicate_offset.push_back(current_added_rows);
}
};
auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col)
{
const PaddedPODArray<UInt8> & filter_flags = assert_cast<const ColumnUInt8 &>(*filter_col).getData();
size_t prev_replicated_row = 0;
auto selected_right_row_it = selected_rows.begin();
size_t find_result_index = 0;
for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i)
{
bool any_matched = false;
/// right/full join or multiple disjuncts, we need to mark used flags for each row.
if (flag_per_row)
{
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
{
if (filter_flags[replicated_row])
{
if constexpr (join_features.is_semi_join || join_features.is_any_join)
{
/// For LEFT/INNER SEMI/ANY JOIN, we need to add only first appeared row from left,
if constexpr (join_features.left || join_features.inner)
{
if (!any_matched)
{
// For inner join, we need mark each right row'flag, because we only use each right row once.
auto used_once = used_flags.template setUsedOnce<join_features.need_flags, true>(
selected_right_row_it->block, selected_right_row_it->row_num, 0);
if (used_once)
{
any_matched = true;
total_added_rows += 1;
added_columns.appendFromBlock(
*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing);
}
}
}
else
{
auto used_once = used_flags.template setUsedOnce<join_features.need_flags, true>(
selected_right_row_it->block, selected_right_row_it->row_num, 0);
if (used_once)
{
any_matched = true;
total_added_rows += 1;
added_columns.appendFromBlock(
*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing);
}
}
}
else if constexpr (join_features.is_anti_join)
{
any_matched = true;
if constexpr (join_features.right && join_features.need_flags)
used_flags.template setUsed<true, true>(selected_right_row_it->block, selected_right_row_it->row_num, 0);
}
else
{
any_matched = true;
total_added_rows += 1;
added_columns.appendFromBlock(
*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing);
used_flags.template setUsed<join_features.need_flags, true>(
selected_right_row_it->block, selected_right_row_it->row_num, 0);
}
}
++selected_right_row_it;
}
}
else
{
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
{
if constexpr (join_features.is_anti_join)
{
any_matched |= filter_flags[replicated_row];
}
else if constexpr (join_features.need_replication)
{
if (filter_flags[replicated_row])
{
any_matched = true;
added_columns.appendFromBlock(
*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing);
total_added_rows += 1;
}
++selected_right_row_it;
}
else
{
if (filter_flags[replicated_row])
{
any_matched = true;
added_columns.appendFromBlock(
*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing);
total_added_rows += 1;
selected_right_row_it = selected_right_row_it + row_replicate_offset[i] - replicated_row;
break;
}
else
++selected_right_row_it;
}
}
}
if constexpr (join_features.is_anti_join)
{
if (!any_matched)
{
if constexpr (join_features.left)
if (need_filter)
setUsed<true>(added_columns.filter, left_start_row + i - 1);
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, total_added_rows);
}
}
else
{
if (!any_matched)
{
addNotFoundRow<join_features.add_missing, join_features.need_replication>(added_columns, total_added_rows);
}
else
{
if (!flag_per_row)
used_flags.template setUsed<join_features.need_flags, false>(find_results[find_result_index]);
if (need_filter)
setUsed<true>(added_columns.filter, left_start_row + i - 1);
if constexpr (join_features.add_missing)
added_columns.applyLazyDefaults();
}
}
find_result_index += (prev_replicated_row != row_replicate_offset[i]);
if constexpr (join_features.need_replication)
{
(*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows;
}
prev_replicated_row = row_replicate_offset[i];
}
};
while (left_row_iter < left_block_rows && !exceeded_max_block_rows)
{
auto left_start_row = left_row_iter;
collect_keys_matched_rows_refs();
if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, "
"left_start_row: {}",
selected_rows.size(),
current_added_rows,
row_replicate_offset.size(),
left_row_iter,
left_start_row);
}
auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns);
copy_final_matched_rows(left_start_row, filter_col);
if constexpr (join_features.need_replication)
{
// Add a check for current_added_rows to avoid run the filter expression on too small size batch.
if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024)
exceeded_max_block_rows = true;
}
}
if constexpr (join_features.need_replication)
{
added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter);
added_columns.filter.resize_assume_reserved(left_row_iter);
}
added_columns.applyLazyDefaults();
return left_row_iter;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
Block HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::sliceBlock(Block & block, size_t num_rows)
{
size_t total_rows = block.rows();
if (num_rows >= total_rows)
return {};
size_t remaining_rows = total_rows - num_rows;
Block remaining_block = block.cloneEmpty();
for (size_t i = 0; i < block.columns(); ++i)
{
auto & col = block.getByPosition(i);
remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows);
col.column = col.column->cut(0, num_rows);
}
return remaining_block;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
ColumnWithTypeAndName HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::copyLeftKeyColumnToRight(
const DataTypePtr & right_key_type,
const String & renamed_right_column,
const ColumnWithTypeAndName & left_column,
const IColumn::Filter * null_map_filter)
{
ColumnWithTypeAndName right_column = left_column;
right_column.name = renamed_right_column;
if (null_map_filter)
right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter);
bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type);
if (null_map_filter)
correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter);
else
correctNullabilityInplace(right_column, should_be_nullable);
if (!right_column.type->equals(*right_key_type))
{
right_column.column = castColumnAccurate(right_column, right_key_type);
right_column.type = right_key_type;
}
right_column.column = right_column.column->convertToFullColumnIfConst();
return right_column;
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
void HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable)
{
if (nullable)
{
JoinCommon::convertColumnToNullable(column);
}
else
{
/// We have to replace values masked by NULLs with defaults.
if (column.column)
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&*column.column))
column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true);
JoinCommon::removeColumnNullability(column);
}
}
template <JoinKind KIND, JoinStrictness STRICTNESS, typename MapsTemplate>
void HashJoinMethods<KIND, STRICTNESS, MapsTemplate>::correctNullabilityInplace(
ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map)
{
if (nullable)
{
JoinCommon::convertColumnToNullable(column);
if (column.type->isNullable() && !negative_null_map.empty())
{
MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column));
assert_cast<ColumnNullable &>(*mutable_column).applyNegatedNullMap(negative_null_map);
column.column = std::move(mutable_column);
}
}
else
JoinCommon::removeColumnNullability(column);
}
}

View File

@ -1,10 +1,11 @@
#include <Interpreters/HashJoin/HashJoinMethods.h>
#include <Interpreters/HashJoin/HashJoinMethodsImpl.h>
namespace DB
{
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::RightAny, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Any, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Any, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::All, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Semi, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Inner, JoinStrictness::Anti, HashJoin::MapsOne>;

View File

@ -3,15 +3,15 @@
#include <Interpreters/joinDispatch.h>
namespace DB
{
template <JoinKind KIND, JoinStrictness STRICTNESS>
template <JoinKind KIND, JoinStrictness STRICTNESS, typename Map>
struct JoinFeatures
{
static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any;
static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left);
static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All;
static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi;
static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti;
static constexpr bool is_any_or_semi_join = is_any_join || STRICTNESS == JoinStrictness::RightAny || (is_semi_join && KIND == JoinKind::Left);
static constexpr bool left = KIND == JoinKind::Left;
static constexpr bool right = KIND == JoinKind::Right;
@ -22,7 +22,8 @@ struct JoinFeatures
static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left));
static constexpr bool add_missing = (left || full) && !is_semi_join;
static constexpr bool need_flags = MapGetter<KIND, STRICTNESS>::flagged;
static constexpr bool need_flags = MapGetter<KIND, STRICTNESS, std::is_same_v<std::decay_t<Map>, HashJoin::MapsAll>>::flagged;
static constexpr bool is_maps_all = std::is_same_v<std::decay_t<Map>, HashJoin::MapsAll>;
};
}

View File

@ -26,10 +26,10 @@ public:
/// Update size for vector with flags.
/// Calling this method invalidates existing flags.
/// It can be called several times, but all of them should happen before using this structure.
template <JoinKind KIND, JoinStrictness STRICTNESS>
template <JoinKind KIND, JoinStrictness STRICTNESS, bool prefer_use_maps_all>
void reinit(size_t size)
{
if constexpr (MapGetter<KIND, STRICTNESS>::flagged)
if constexpr (MapGetter<KIND, STRICTNESS, prefer_use_maps_all>::flagged)
{
assert(flags[nullptr].size() <= size);
need_flags = true;
@ -43,10 +43,10 @@ public:
}
}
template <JoinKind KIND, JoinStrictness STRICTNESS>
template <JoinKind KIND, JoinStrictness STRICTNESS, bool prefer_use_maps_all>
void reinit(const Block * block_ptr)
{
if constexpr (MapGetter<KIND, STRICTNESS>::flagged)
if constexpr (MapGetter<KIND, STRICTNESS, prefer_use_maps_all>::flagged)
{
assert(flags[block_ptr].size() <= block_ptr->rows());
need_flags = true;
@ -148,6 +148,31 @@ public:
}
}
template <bool use_flags, bool flag_per_row>
bool setUsedOnce(const Block * block, size_t row_num, size_t offset)
{
if constexpr (!use_flags)
return true;
if constexpr (flag_per_row)
{
/// fast check to prevent heavy CAS with seq_cst order
if (flags[block][row_num].load(std::memory_order_relaxed))
return false;
bool expected = false;
return flags[block][row_num].compare_exchange_strong(expected, true);
}
else
{
/// fast check to prevent heavy CAS with seq_cst order
if (flags[nullptr][offset].load(std::memory_order_relaxed))
return false;
bool expected = false;
return flags[nullptr][offset].compare_exchange_strong(expected, true);
}
}
};
}

View File

@ -1,11 +1,14 @@
#include <Interpreters/HashJoin/HashJoinMethods.h>
#include <Interpreters/HashJoin/HashJoinMethodsImpl.h>
namespace DB
{
template class HashJoinMethods<JoinKind::Left, JoinStrictness::RightAny, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Any, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Any, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::All, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Semi, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Semi, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Anti, HashJoin::MapsOne>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Anti, HashJoin::MapsAll>;
template class HashJoinMethods<JoinKind::Left, JoinStrictness::Asof, HashJoin::MapsAsof>;
}

View File

@ -1,4 +1,4 @@
#include <Interpreters/HashJoin/HashJoinMethods.h>
#include <Interpreters/HashJoin/HashJoinMethodsImpl.h>
namespace DB
{

View File

@ -26,11 +26,9 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar
""
}
};
auto get_cast_func = [cast_type, &arguments]
auto get_cast_func = [from = arg, to = type, cast_type]
{
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(cast_type, {});
return func_builder_cast->build(arguments);
return createInternalCast(from, to, cast_type, {});
};
FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func();

View File

@ -12,38 +12,53 @@
namespace DB
{
template <JoinKind kind, JoinStrictness join_strictness>
/// HashJoin::MapsOne is more efficient, it only store one row for each key in the map. It is recommended to use it whenever possible.
/// When only need to match only one row from right table, use HashJoin::MapsOne. For example, LEFT ANY/SEMI/ANTI.
///
/// HashJoin::MapsAll will store all rows for each key in the map. It is used when need to match multiple rows from right table.
/// For example, LEFT ALL, INNER ALL, RIGHT ALL/ANY.
///
/// prefer_use_maps_all is true when there is mixed inequal condition in the join condition. For example, `t1.a = t2.a AND t1.b > t2.b`.
/// In this case, we need to use HashJoin::MapsAll to store all rows for each key in the map. We will select all matched rows from the map
/// and filter them by `t1.b > t2.b`.
///
/// flagged indicates whether we need to store flags for each row whether it has been used in the join. See JoinUsedFlags.h.
template <JoinKind kind, JoinStrictness join_strictness, bool prefer_use_maps_all>
struct MapGetter;
template <> struct MapGetter<JoinKind::Left, JoinStrictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Right, JoinStrictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Full, JoinStrictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Left, JoinStrictness::RightAny, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Inner, JoinStrictness::RightAny, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Right, JoinStrictness::RightAny, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Full, JoinStrictness::RightAny, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Any> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::Any> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Right, JoinStrictness::Any> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Full, JoinStrictness::Any> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Any, false> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Any, true> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::Any, true> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::Any, false> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Right, JoinStrictness::Any, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Full, JoinStrictness::Any, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Right, JoinStrictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Full, JoinStrictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Left, JoinStrictness::All, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Inner, JoinStrictness::All, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Right, JoinStrictness::All, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Full, JoinStrictness::All, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Semi> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::Semi> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Right, JoinStrictness::Semi> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Full, JoinStrictness::Semi> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Semi, false> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Semi, true> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Inner, JoinStrictness::Semi, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Right, JoinStrictness::Semi, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Full, JoinStrictness::Semi, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Anti> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Inner, JoinStrictness::Anti> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Right, JoinStrictness::Anti> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <> struct MapGetter<JoinKind::Full, JoinStrictness::Anti> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
/// Only ANTI LEFT and ANTI RIGHT are valid. INNER and FULL are here for templates instantiation.
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Anti, false> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <> struct MapGetter<JoinKind::Left, JoinStrictness::Anti, true> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Inner, JoinStrictness::Anti, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Right, JoinStrictness::Anti, prefer_use_maps_all> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
template <bool prefer_use_maps_all> struct MapGetter<JoinKind::Full, JoinStrictness::Anti, prefer_use_maps_all> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
template <JoinKind kind>
struct MapGetter<kind, JoinStrictness::Asof> { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; };
template <JoinKind kind, bool prefer_use_maps_all>
struct MapGetter<kind, JoinStrictness::Asof, prefer_use_maps_all> { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; };
static constexpr std::array<JoinStrictness, 6> STRICTNESSES = {
JoinStrictness::RightAny,
@ -62,7 +77,7 @@ static constexpr std::array<JoinKind, 4> KINDS = {
};
/// Init specified join map
inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps)
inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps, bool prefer_use_maps_all = false)
{
return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij)
{
@ -70,7 +85,10 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin:
constexpr auto j = ij % STRICTNESSES.size();
if (kind == KINDS[i] && strictness == STRICTNESSES[j])
{
maps = typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map();
if (prefer_use_maps_all)
maps = typename MapGetter<KINDS[i], STRICTNESSES[j], true>::Map();
else
maps = typename MapGetter<KINDS[i], STRICTNESSES[j], false>::Map();
return true;
}
return false;
@ -79,7 +97,7 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin:
/// Call function on specified join map
template <typename MapsVariant, typename Func>
inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, Func && func)
inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, bool prefer_use_maps_all, Func && func)
{
return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij)
{
@ -89,10 +107,16 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant &
constexpr auto j = ij % STRICTNESSES.size();
if (kind == KINDS[i] && strictness == STRICTNESSES[j])
{
func(
std::integral_constant<JoinKind, KINDS[i]>(),
std::integral_constant<JoinStrictness, STRICTNESSES[j]>(),
std::get<typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map>(maps));
if (prefer_use_maps_all)
func(
std::integral_constant<JoinKind, KINDS[i]>(),
std::integral_constant<JoinStrictness, STRICTNESSES[j]>(),
std::get<typename MapGetter<KINDS[i], STRICTNESSES[j], true>::Map>(maps));
else
func(
std::integral_constant<JoinKind, KINDS[i]>(),
std::integral_constant<JoinStrictness, STRICTNESSES[j]>(),
std::get<typename MapGetter<KINDS[i], STRICTNESSES[j], false>::Map>(maps));
return true;
}
return false;
@ -101,7 +125,7 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant &
/// Call function on specified join map
template <typename MapsVariant, typename Func>
inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector<const MapsVariant *> & mapsv, Func && func)
inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector<const MapsVariant *> & mapsv, bool prefer_use_maps_all, Func && func)
{
return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij)
{
@ -111,17 +135,31 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector<c
constexpr auto j = ij % STRICTNESSES.size();
if (kind == KINDS[i] && strictness == STRICTNESSES[j])
{
using MapType = typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map;
std::vector<const MapType *> v;
v.reserve(mapsv.size());
for (const auto & el : mapsv)
v.push_back(&std::get<MapType>(*el));
if (prefer_use_maps_all)
{
using MapType = typename MapGetter<KINDS[i], STRICTNESSES[j], true>::Map;
std::vector<const MapType *> v;
v.reserve(mapsv.size());
for (const auto & el : mapsv)
v.push_back(&std::get<MapType>(*el));
func(
std::integral_constant<JoinKind, KINDS[i]>(),
std::integral_constant<JoinStrictness, STRICTNESSES[j]>(),
v
/*std::get<typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map>(maps)*/);
func(
std::integral_constant<JoinKind, KINDS[i]>(), std::integral_constant<JoinStrictness, STRICTNESSES[j]>(), v
/*std::get<typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map>(maps)*/);
}
else
{
using MapType = typename MapGetter<KINDS[i], STRICTNESSES[j], false>::Map;
std::vector<const MapType *> v;
v.reserve(mapsv.size());
for (const auto & el : mapsv)
v.push_back(&std::get<MapType>(*el));
func(
std::integral_constant<JoinKind, KINDS[i]>(), std::integral_constant<JoinStrictness, STRICTNESSES[j]>(), v
/*std::get<typename MapGetter<KINDS[i], STRICTNESSES[j]>::Map>(maps)*/);
}
return true;
}
return false;

View File

@ -255,20 +255,13 @@ static void appendAggregateFunctions(
const auto * node = input;
if (node->result_name != aggregate.column_name)
{
if (DataTypeAggregateFunction::strictEquals(type, node->result_type))
{
node = &proj_dag.addAlias(*node, aggregate.column_name);
}
else
{
/// Cast to aggregate types specified in query if it's not
/// strictly the same as the one specified in projection. This
/// is required to generate correct results during finalization.
node = &proj_dag.addCast(*node, type, aggregate.column_name);
}
}
if (!DataTypeAggregateFunction::strictEquals(type, node->result_type))
/// Cast to aggregate types specified in query if it's not
/// strictly the same as the one specified in projection. This
/// is required to generate correct results during finalization.
node = &proj_dag.addCast(*node, type, aggregate.column_name);
else if (node->result_name != aggregate.column_name)
node = &proj_dag.addAlias(*node, aggregate.column_name);
proj_dag_outputs.push_back(node);
}

View File

@ -8,6 +8,7 @@
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/LimitByStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/QueryPlanVisitor.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
@ -59,9 +60,10 @@ public:
if (typeid_cast<LimitStep *>(current_step)
|| typeid_cast<LimitByStep *>(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable
|| typeid_cast<FillingStep *>(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable
|| typeid_cast<SortingStep *>(current_step) /// (3) ORDER BY will change order of previous sorting
|| typeid_cast<AggregatingStep *>(current_step)) /// (4) aggregation change order
|| typeid_cast<OffsetStep *>(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable
|| typeid_cast<FillingStep *>(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable
|| typeid_cast<SortingStep *>(current_step) /// (4) ORDER BY will change order of previous sorting
|| typeid_cast<AggregatingStep *>(current_step)) /// (5) aggregation change order
{
logStep("nodes_affect_order/push", current_node);
nodes_affect_order.push_back(current_node);

View File

@ -8,13 +8,15 @@
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <QueryPipeline/Pipe.h>
#include <Processors/ISimpleTransform.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Interpreters/Context.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/ISimpleTransform.h>
#include <QueryPipeline/Pipe.h>
#include <boost/circular_buffer.hpp>
#include <ranges>
namespace DB
{
@ -68,11 +70,17 @@ static void makeFdBlocking(int fd)
static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds)
{
auto logger = getLogger("TimeoutReadBufferFromFileDescriptor");
auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); };
int res;
while (true)
{
Stopwatch watch;
LOG_TEST(logger, "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "));
res = poll(pfds, static_cast<nfds_t>(num), static_cast<int>(timeout_milliseconds));
if (res < 0)
@ -82,7 +90,10 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
const auto elapsed = watch.elapsedMilliseconds();
if (timeout_milliseconds <= elapsed)
{
LOG_TEST(logger, "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds);
break;
}
timeout_milliseconds -= elapsed;
}
else
@ -91,6 +102,12 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
}
}
LOG_TEST(
logger,
"Poll for descriptors: {} returned {}",
fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "),
res);
return res;
}
@ -200,12 +217,6 @@ public:
return true;
}
void reset() const
{
makeFdBlocking(stdout_fd);
makeFdBlocking(stderr_fd);
}
~TimeoutReadBufferFromFileDescriptor() override
{
tryMakeFdBlocking(stdout_fd);

View File

@ -2337,22 +2337,9 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
argument_types[2]->getName());
}
const auto from_name = argument_types[2]->getName();
const auto to_name = argument_types[0]->getName();
ColumnsWithTypeAndName arguments
auto get_cast_func = [from = argument_types[2], to = argument_types[0]]
{
{ argument_types[2], "" },
{
DataTypeString().createColumnConst(0, to_name),
std::make_shared<DataTypeString>(),
""
}
};
auto get_cast_func = [&arguments]
{
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::accurate, {});
return func_builder_cast->build(arguments);
return createInternalCast({from, {}}, to, CastType::accurate, {});
};
func_cast = get_cast_func();

View File

@ -749,8 +749,16 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
/// Probably there is something wrong with files of this part.
/// So it can be helpful to add to the error message some information about those files.
String files_in_part;
for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next())
files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name()));
{
std::string file_info;
if (!getDataPartStorage().isDirectory(it->name()))
file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name()));
files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info);
}
if (!files_in_part.empty())
e->addMessage("Part contains files: {}", files_in_part);
if (isEmpty())
@ -2141,7 +2149,27 @@ void IMergeTreeDataPart::checkConsistencyBase() const
}
}
checksums.checkSizes(getDataPartStorage());
const auto & data_part_storage = getDataPartStorage();
for (const auto & [filename, checksum] : checksums.files)
{
try
{
checksum.checkSize(data_part_storage, filename);
}
catch (const Exception & ex)
{
/// For projection parts check will mark them broken in loadProjections
if (!parent_part && filename.ends_with(".proj"))
{
std::string projection_name = fs::path(filename).stem();
LOG_INFO(storage.log, "Projection {} doesn't exist on start for part {}, marking it as broken", projection_name, name);
if (hasProjection(projection_name))
markProjectionPartAsBroken(projection_name, ex.message(), ex.code());
}
else
throw;
}
}
}
else
{

View File

@ -1956,11 +1956,8 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable())
? DataTypePtr(std::make_shared<DataTypeNullable>(common_type))
: common_type;
ColumnsWithTypeAndName arguments{
{nullptr, key_expr_type, ""},
{DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
auto func_cast = func_builder_cast->build(arguments);
auto func_cast = createInternalCast({key_expr_type, {}}, common_type_maybe_nullable, CastType::nonAccurate, {});
/// If we know the given range only contains one value, then we treat all functions as positive monotonic.
if (!single_point && !func_cast->hasInformationAboutMonotonicity())

View File

@ -1146,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr);
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false);
if (!filter_dag)
return {};
@ -6932,7 +6932,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
const auto * predicate = filter_dag->getOutputs().at(0);
// Generate valid expressions for filtering
VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context);
VirtualColumnUtils::filterBlockWithPredicate(
predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true);
rows = virtual_columns_block.rows();
part_name_column = virtual_columns_block.getByName("_part").column;

View File

@ -100,12 +100,6 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
}
}
void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const
{
for (const auto & [name, checksum] : files)
checksum.checkSize(storage, name);
}
UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const
{
UInt64 res = 0;

View File

@ -65,9 +65,6 @@ struct MergeTreeDataPartChecksums
static bool isBadChecksumsErrorCode(int code);
/// Checks that the directory contains all the needed files of the correct size. Does not check the checksum.
void checkSizes(const IDataPartStorage & storage) const;
/// Returns false if the checksum is too old.
bool read(ReadBuffer & in);
/// Assume that header with version (the first line) is read

View File

@ -152,23 +152,15 @@ const ActionsDAG::Node & addFunction(
const ActionsDAG::Node & addCast(
const ActionsDAGPtr & dag,
const ActionsDAG::Node & node_to_cast,
const String & type_name,
const DataTypePtr & to_type,
OriginalToNewNodeMap & node_remap)
{
if (node_to_cast.result_type->getName() == type_name)
if (!node_to_cast.result_type->equals(*to_type))
return node_to_cast;
Field cast_type_constant_value(type_name);
ColumnWithTypeAndName column;
column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
column.type = std::make_shared<DataTypeString>();
const auto * cast_type_constant_node = &dag->addColumn(std::move(column));
ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
return addFunction(dag, func_builder_cast, std::move(children), node_remap);
const auto & new_node = dag->addCast(node_to_cast, to_type, {});
node_remap[new_node.result_name] = {dag.get(), &new_node};
return new_node;
}
/// Normalizes the filter node by adding AND with a constant true.
@ -332,7 +324,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
/// Build AND(last_step_result_node, true)
const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap);
/// Build CAST(and_node, type of PREWHERE column)
const auto & cast_node = addCast(last_step_dag, and_node, output->result_type->getName(), node_remap);
const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap);
/// Add alias for the result with the name of the PREWHERE column
const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name);
last_step_dag->addOrReplaceInOutputs(prewhere_result_node);

View File

@ -546,7 +546,11 @@ protected:
return {};
Chunk chunk;
if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(),
if (!joinDispatch(
join->kind,
join->strictness,
join->data->maps.front(),
join->table_join->getMixedJoinExpression() != nullptr,
[&](auto kind, auto strictness, auto & map) { chunk = createChunk<kind, strictness>(map); }))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness");
return chunk;

View File

@ -21,7 +21,7 @@ const char * auto_config_build[]
"BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@",
"USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@",
"USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@",
"USE_JEMALLOC", "@ENABLE_JEMALLOC@",
"USE_JEMALLOC", "@USE_JEMALLOC@",
"USE_ICU", "@USE_ICU@",
"USE_H3", "@USE_H3@",
"USE_MYSQL", "@USE_MYSQL@",
@ -36,7 +36,7 @@ const char * auto_config_build[]
"USE_SSL", "@USE_SSL@",
"OPENSSL_VERSION", "@OPENSSL_VERSION@",
"OPENSSL_IS_BORING_SSL", "@OPENSSL_IS_BORING_SSL@",
"USE_VECTORSCAN", "@ENABLE_VECTORSCAN@",
"USE_VECTORSCAN", "@USE_VECTORSCAN@",
"USE_SIMDJSON", "@USE_SIMDJSON@",
"USE_ODBC", "@USE_ODBC@",
"USE_GRPC", "@USE_GRPC@",
@ -62,8 +62,8 @@ const char * auto_config_build[]
"USE_ARROW", "@USE_ARROW@",
"USE_ORC", "@USE_ORC@",
"USE_MSGPACK", "@USE_MSGPACK@",
"USE_QPL", "@ENABLE_QPL@",
"USE_QAT", "@ENABLE_QATLIB@",
"USE_QPL", "@USE_QPL@",
"USE_QATLIB", "@USE_QATLIB@",
"GIT_HASH", "@GIT_HASH@",
"GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4",
"GIT_DATE", "@GIT_DATE@",

View File

@ -1,39 +1,43 @@
#include <memory>
#include <stack>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/FilterDescription.h>
#include <Core/NamesAndTypes.h>
#include <Core/TypeId.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsLogical.h>
#include <Functions/IFunction.h>
#include <Functions/IFunctionAdaptors.h>
#include <Functions/indexHint.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/misc.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/FilterDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Parsers/makeASTForLogicalFunction.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/Sinks/EmptySink.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Storages/VirtualColumnUtils.h>
#include <IO/WriteHelpers.h>
#include <Common/re2.h>
@ -337,9 +341,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
}
static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
const ActionsDAG::Node * node,
const Block * allowed_inputs,
ActionsDAG::Nodes & additional_nodes)
const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result)
{
if (node->type == ActionsDAG::ActionType::FUNCTION)
{
@ -348,8 +350,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto & node_copy = additional_nodes.emplace_back(*node);
node_copy.children.clear();
for (const auto * child : node->children)
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes))
if (const auto * child_copy
= splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result))
node_copy.children.push_back(child_copy);
/// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for
/// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
/// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
/// trivial count.
else if (!allow_partial_result)
return nullptr;
if (node_copy.children.empty())
return nullptr;
@ -357,7 +366,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
if (node_copy.children.size() == 1)
{
const ActionsDAG::Node * res = node_copy.children.front();
/// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires
/// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires
/// at least two arguments; also it can't be reduced to (256) because result type is different.
if (!res->result_type->equals(*node->result_type))
{
@ -375,7 +384,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
{
auto & node_copy = additional_nodes.emplace_back(*node);
for (auto & child : node_copy.children)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child)
return nullptr;
return &node_copy;
@ -389,7 +398,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto index_hint_dag = index_hint->getActions().clone();
ActionsDAG::NodeRawConstPtrs atoms;
for (const auto & output : index_hint_dag.getOutputs())
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
if (const auto * child_copy
= splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result))
atoms.push_back(child_copy);
if (!atoms.empty())
@ -423,22 +433,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
return node;
}
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
std::optional<ActionsDAG>
splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result)
{
if (!predicate)
return {};
ActionsDAG::Nodes additional_nodes;
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes);
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result);
if (!res)
return {};
return ActionsDAG::cloneSubDAG({res}, true);
}
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
void filterBlockWithPredicate(
const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate)
{
auto dag = splitFilterDagForAllowedInputs(predicate, &block);
auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate);
if (dag)
filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block);
}

View File

@ -27,9 +27,13 @@ namespace VirtualColumnUtils
///
/// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed"
/// if there are subqueries.
///
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate.
/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs).
/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate
/// cannot be evaluated using the columns from the block.
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true);
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))).
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context);
/// Just filters block. Block should contain all the required columns.
ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context);
@ -42,7 +46,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);
/// Extract a part of predicate that can be evaluated using only columns from input_names.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
/// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically
/// on the given inputs.
/// allow_partial_result must be false when we are going to use the result to filter parts in
/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
/// count optimization will be mistakenly applied to the query.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true);
/// Extract from the input stream a set of `name` column values
template <typename T>

View File

@ -135,6 +135,12 @@ endif()
if (TARGET ch_contrib::vectorscan)
set(USE_VECTORSCAN 1)
endif()
if (TARGET ch_contrib::qpl)
set(USE_QPL 1)
endif()
if (TARGET ch_contrib::qatlib)
set(USE_QATLIB 1)
endif()
if (TARGET ch_contrib::avrocpp)
set(USE_AVRO 1)
endif()
@ -161,8 +167,8 @@ endif()
if (TARGET ch_contrib::ssh)
set(USE_SSH 1)
endif()
if (TARGET ch_contrib::fiu)
set(FIU_ENABLE 1)
if (TARGET ch_contrib::libfiu)
set(USE_LIBFIU 1)
endif()
if (TARGET ch_contrib::libarchive)
set(USE_LIBARCHIVE 1)

View File

@ -985,6 +985,7 @@ def _run_test(job_name: str, run_command: str) -> int:
else:
print("Use run command from the workflow")
env["CHECK_NAME"] = job_name
env["MAX_RUN_TIME"] = str(timeout or 0)
print(f"Going to start run command [{run_command}]")
stopwatch = Stopwatch()
job_log = Path(TEMP_PATH) / "job_log.txt"

View File

@ -114,6 +114,9 @@ def get_run_command(
if flaky_check:
envs.append("-e NUM_TRIES=50")
envs.append("-e MAX_RUN_TIME=2800")
else:
max_run_time = os.getenv("MAX_RUN_TIME", "0")
envs.append(f"-e MAX_RUN_TIME={max_run_time}")
envs += [f"-e {e}" for e in additional_envs]

View File

@ -738,7 +738,7 @@ def create_test_html_report(
if test_results:
rows_part = []
num_fails = 0
has_test_time = False
has_test_time = any(tr.time is not None for tr in test_results)
has_log_urls = False
# Display entires with logs at the top (they correspond to failed tests)
@ -770,9 +770,11 @@ def create_test_html_report(
row.append(f'<td {fail_id}style="{style}">{test_result.status}</td>')
colspan += 1
if test_result.time is not None:
has_test_time = True
row.append(f"<td>{test_result.time}</td>")
if has_test_time:
if test_result.time is not None:
row.append(f"<td>{test_result.time}</td>")
else:
row.append("<td></td>")
colspan += 1
if test_result.log_urls is not None:

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<clickhouse>
<merge_tree>
<max_suspicious_broken_parts_bytes>0</max_suspicious_broken_parts_bytes>
</merge_tree>
</clickhouse>

View File

@ -4,6 +4,7 @@ import logging
import string
import random
from helpers.cluster import ClickHouseCluster
from multiprocessing.dummy import Pool
cluster = ClickHouseCluster(__file__)
@ -18,6 +19,12 @@ def cluster():
stay_alive=True,
with_zookeeper=True,
)
cluster.add_instance(
"node_restart",
main_configs=["config.d/dont_start_broken.xml"],
stay_alive=True,
with_zookeeper=True,
)
logging.info("Starting cluster...")
cluster.start()
@ -632,6 +639,49 @@ def test_broken_on_start(cluster):
check(node, table_name, 0)
def test_disappeared_projection_on_start(cluster):
node = cluster.instances["node_restart"]
table_name = "test_disapperead_projection"
create_table(node, table_name, 1)
node.query(f"SYSTEM STOP MERGES {table_name}")
insert(node, table_name, 0, 5)
insert(node, table_name, 5, 5)
insert(node, table_name, 10, 5)
insert(node, table_name, 15, 5)
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
node, table_name
)
def drop_projection():
node.query(
f"ALTER TABLE {table_name} DROP PROJECTION proj2",
settings={"mutations_sync": "0"},
)
p = Pool(2)
p.apply_async(drop_projection)
for i in range(30):
create_query = node.query(f"SHOW CREATE TABLE {table_name}")
if "proj2" not in create_query:
break
time.sleep(0.5)
assert "proj2" not in create_query
# Remove 'proj2' for part all_2_2_0
break_projection(node, table_name, "proj2", "all_2_2_0", "part")
node.restart_clickhouse()
# proj2 is not broken, it doesn't exist, but ok
check(node, table_name, 0, expect_broken_part="proj2", do_check_command=0)
def test_mutation_with_broken_projection(cluster):
node = cluster.instances["node"]

View File

@ -176,7 +176,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table):
select_handler = node.get_query_request(
f"""
SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0;
SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0, max_threads=1;
""",
query_id=query_id,
)

View File

@ -771,7 +771,11 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
table_name,
mode,
files_path,
additional_settings={"keeper_path": keeper_path, "s3queue_buckets": 2},
additional_settings={
"keeper_path": keeper_path,
"s3queue_buckets": 2,
**({"s3queue_processing_threads_num": 1} if mode == "ordered" else {}),
},
)
for instance in [node, node_2]:
@ -806,6 +810,10 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines()
]
logging.debug(
f"res1 size: {len(res1)}, res2 size: {len(res2)}, total_rows: {total_rows}"
)
assert len(res1) + len(res2) == total_rows
# Checking that all engines have made progress

View File

@ -0,0 +1,26 @@
<test>
<settings>
<max_insert_threads>4</max_insert_threads>
</settings>
<create_query>
CREATE TABLE t_subcolumns (a Array(UInt64), s Nullable(String), m Map(String, UInt64)) ENGINE = MergeTree ORDER BY tuple()
</create_query>
<fill_query>
INSERT INTO t_subcolumns SELECT range(number % 20), toString(number), mapFromArrays(range(number % 20), range(number % 20)) FROM numbers_mt(50000000)
</fill_query>
<fill_query>
OPTIMIZE TABLE t_subcolumns FINAL
</fill_query>
<query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(a))</query>
<query>SELECT count() FROM t_subcolumns WHERE notEmpty(a)</query>
<query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(m))</query>
<query>SELECT count() FROM t_subcolumns WHERE notEmpty(m)</query>
<query>SELECT count() FROM t_subcolumns WHERE isNotNull(s)</query>
<query>SELECT count(s) FROM t_subcolumns</query>
<drop_query>DROP TABLE t_subcolumns</drop_query>
</test>

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, no-fasttest, no-debug
# Tags: long, no-fasttest, no-debug, no-asan, no-msan, no-tsan
#
# Load all possible .parquet files found in submodules.

View File

@ -27,7 +27,7 @@ function wait_until()
function get_buffer_delay()
{
local buffer_insert_id=$1 && shift
query "SYSTEM FLUSH LOGS"
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
query "
WITH
(SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_,

View File

@ -1,10 +1,16 @@
#!/usr/bin/env bash
# Tags: long
# Tags: long, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
function query()
{
# NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*"
}
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
verify_sql="SELECT
(SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
@ -18,13 +24,13 @@ verify()
{
for i in {1..5000}
do
result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" )
result=$( query "$verify_sql" )
[ "$result" = "1" ] && echo "$result" && break
sleep 0.1
if [[ $i -eq 5000 ]]
then
$CLICKHOUSE_CLIENT "
query "
SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics;
SELECT sum(active), sum(NOT active) FROM system.parts;
SELECT sum(active), sum(NOT active) FROM system.projection_parts;
@ -34,17 +40,17 @@ verify()
done
}
$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table"
$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;"
query "DROP TABLE IF EXISTS test_table"
query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')"
query "INSERT INTO test_table VALUES ('1992-01-01')"
verify
$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')"
query "INSERT INTO test_table VALUES ('1992-01-02')"
verify
$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL"
query "OPTIMIZE TABLE test_table FINAL"
verify
$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table"
query "DROP TABLE test_table"
verify

View File

@ -17,7 +17,7 @@ used_functions
['repeat']
arraySort(used_data_type_families)
['Array','Int32','Nullable','String']
['Int32','Nullable','String']
used_database_engines
['Atomic']

View File

@ -1,2 +1,3 @@
3
950 990 500 2000
[950] [999]

View File

@ -29,4 +29,6 @@ FROM cluster('test_cluster_two_shards', currentDatabase(), r)
WHERE a = 'x'
settings prefer_localhost_replica=0;
SELECT quantilesTimingMerge(0.95)(q), quantilesTimingMerge(toInt64(1))(q) FROM remote('127.0.0.{1,2}', currentDatabase(), r);
DROP TABLE r;

View File

@ -1,4 +1,5 @@
-- Tags: long
-- Tags: long, no-parallel
-- set no-parallel tag is to prevent timeout of this test
drop table if exists t;

View File

@ -1,4 +1,6 @@
Code: 159
0
Code: 159
query_duration 1
0
query_duration 1
Code: 159
0

View File

@ -1,27 +1,23 @@
#!/usr/bin/env bash
# Tags: no-debug
# no-debug: Query is canceled by timeout after max_execution_time,
# but sending an exception to the client may hang
# for more than MAX_PROCESS_WAIT seconds in a slow debug build,
# and test will fail.
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
MAX_PROCESS_WAIT=5
IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'")
if [ "$IS_SANITIZER" -gt 0 ]; then
# Query may hang for more than 5 seconds, especially in tsan build
MAX_PROCESS_WAIT=15
TIMEOUT=5
IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'")
if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then
# Increase the timeout due to in debug/sanitizers build:
# - client is slow
# - stacktrace resolving is slow
TIMEOUT=15
fi
# TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor
### Should be cancelled after 1 second and return a 159 exception (timeout)
timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \
"SELECT * FROM
query_id=$(random_str 12)
$CLICKHOUSE_CLIENT --query_id "$query_id" --max_execution_time 1 -q "
SELECT * FROM
(
SELECT a.name as n
FROM
@ -34,28 +30,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \
GROUP BY n
)
LIMIT 20
FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq
FORMAT Null
" 2>&1 | grep -m1 -o "Code: 159"
$CLICKHOUSE_CLIENT -q "system flush logs"
${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'"
### Should stop pulling data and return what has been generated already (return code 0)
timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \
"SELECT a.name as n
FROM
(
SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000
) AS a,
(
SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000
) as b
FORMAT Null
SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break'
"
query_id=$(random_str 12)
$CLICKHOUSE_CLIENT --query_id "$query_id" -q "
SELECT a.name as n
FROM
(
SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000
) AS a,
(
SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000
) as b
FORMAT Null
SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break'
"
echo $?
$CLICKHOUSE_CLIENT -q "system flush logs"
${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'"
# HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor
### Should be cancelled after 1 second and return a 159 exception (timeout)
${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \
"SELECT * FROM
${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d "
SELECT * FROM
(
SELECT a.name as n
FROM
@ -68,12 +71,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec
GROUP BY n
)
LIMIT 20
FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq
FORMAT Null
" 2>&1 | grep -o "Code: 159" | sort | uniq
### Should stop pulling data and return what has been generated already (return code 0)
${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \
"SELECT a.name as n
${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL" -d "
SELECT a.name as n
FROM
(
SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000
@ -83,5 +87,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \
) as b
FORMAT Null
SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break'
"
"
echo $?

View File

@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY))
ReadFromStorage (SystemOne)
-- execute
Float64 9007199254740994
-- presence of an inner OFFSET retains the ORDER BY
-- query
WITH
t1 AS (
SELECT a, b
FROM
VALUES (
'b UInt32, a Int32',
(1, 1),
(2, 0)
)
)
SELECT
SUM(a)
FROM (
SELECT a, b
FROM t1
ORDER BY 1 DESC, 2
OFFSET 1
) t2
-- explain
Expression ((Projection + Before ORDER BY))
Aggregating
Expression (Before GROUP BY)
Offset
Expression (Projection)
Sorting (Sorting for ORDER BY)
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
ReadFromStorage (Values)
-- execute
0
-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function
-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order
-- query

View File

@ -302,6 +302,27 @@ FROM
)"
run_query "$query"
echo "-- presence of an inner OFFSET retains the ORDER BY"
query="WITH
t1 AS (
SELECT a, b
FROM
VALUES (
'b UInt32, a Int32',
(1, 1),
(2, 0)
)
)
SELECT
SUM(a)
FROM (
SELECT a, b
FROM t1
ORDER BY 1 DESC, 2
OFFSET 1
) t2"
run_query "$query"
echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function"
ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION"
echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order"

View File

@ -464,6 +464,36 @@ Expression ((Project names + Projection))
ReadFromStorage (SystemOne)
-- execute
Float64 9007199254740994
-- presence of an inner OFFSET retains the ORDER BY
-- query
WITH
t1 AS (
SELECT a, b
FROM
VALUES (
'b UInt32, a Int32',
(1, 1),
(2, 0)
)
)
SELECT
SUM(a)
FROM (
SELECT a, b
FROM t1
ORDER BY 1 DESC, 2
OFFSET 1
) t2
-- explain
Expression ((Project names + Projection))
Aggregating
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
Offset
Sorting (Sorting for ORDER BY)
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
ReadFromStorage (Values)
-- execute
0
-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function
-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order
-- query

View File

@ -382,6 +382,253 @@ key1 e 5 5 5 key1 C 3 4 5
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 key4 F 1 1 1
SET join_algorithm='hash';
SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
key1 d 4 7 2 0 0 \N
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 0 0 \N
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
key4 f 2 3 4 key4 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 d 4 7 2 key1 0 0 \N
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
key4 f 2 3 4 key4 0 0 \N
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
key1 a 1 1 2 key1 A 1 2 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key1 A 1 2 1
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key1 A 1 2 1
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
SET join_algorithm='grace_hash';
SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
key1 d 4 7 2 0 0 \N
key1 e 5 5 5 0 0 \N
key2 a2 1 1 1 0 0 \N
key4 f 2 3 4 0 0 \N
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
key4 f 2 3 4 key4 0 0 \N
SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 d 4 7 2 key1 0 0 \N
key1 e 5 5 5 key1 0 0 \N
key2 a2 1 1 1 key2 0 0 \N
key4 f 2 3 4 key4 0 0 \N
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
key1 a 1 1 2 key1 A 1 2 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key1 A 1 2 1
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 a 1 1 2 key1 C 3 4 5
key1 a 1 1 2 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
0 0 \N key1 A 1 2 1
0 0 \N key3 a3 1 1 1
0 0 \N key4 F 1 1 1
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
SET join_algorithm='hash';
SELECT t1.* FROM t1 LEFT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 a 1 1 2
key1 b 2 3 2
key1 c 3 2 1
key1 d 4 7 2
key1 e 5 5 5
key2 a2 1 1 1
key4 f 2 3 4
SELECT t1.* FROM t1 LEFT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 a 1 1 2
key1 b 2 3 2
key1 c 3 2 1
key1 d 4 7 2
key2 a2 1 1 1
key4 f 2 3 4
SELECT t1.* FROM t1 LEFT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 e 5 5 5
SELECT t1.* FROM t1 RIGHT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
SELECT t1.* FROM t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
key1 a 1 1 2
SELECT t1.* FROM t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
SET join_algorithm='hash';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 a 1 1 2 key1 B 2 1 2
@ -452,3 +699,46 @@ key2 a2 1 1 1 key1 A 1 2 1
key2 a2 1 1 1 key3 a3 1 1 1
key2 a2 1 1 1 key4 F 1 1 1
key4 f 2 3 4 key1 B 2 1 2
SET join_algorithm='hash';
SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SET join_algorithm='grace_hash';
SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 C 3 4 5
key1 d 4 7 2 key1 D 4 1 6
key4 f 2 3 4 key4 F 1 1 1
SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 A 1 2 1
key1 b 2 3 2 key1 B 2 1 2
key1 c 3 2 1 key1 B 2 1 2
key1 d 4 7 2 key1 D 4 1 6
SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
key1 a 1 1 2 key1 B 2 1 2
key1 b 2 3 2 key1 C 3 4 5
key1 c 3 2 1 key1 D 4 1 6
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
1 1 1 1 1 1
SET join_algorithm='hash';
SELECT t1.* FROM t1 INNER ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
key1 a 1 1 2
key1 b 2 3 2
key1 c 3 2 1
key1 d 4 7 2
key2 a2 1 1 1

View File

@ -22,6 +22,26 @@ SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2
{% endfor -%}
{% endfor -%}
{% for algorithm in ['hash', 'grace_hash'] -%}
SET join_algorithm='{{ algorithm }}';
{% for join_type in ['LEFT', 'RIGHT'] -%}
{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%}
SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
{% endfor -%}
{% endfor -%}
{% endfor -%}
{% for algorithm in ['hash'] -%}
SET join_algorithm='{{ algorithm }}';
{% for join_type in ['LEFT', 'RIGHT'] -%}
{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%}
SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
{% endfor -%}
{% endfor -%}
{% endfor -%}
{% for algorithm in ['hash'] -%}
SET join_algorithm='{{ algorithm }}';
@ -29,6 +49,28 @@ SET join_algorithm='{{ algorithm }}';
SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
{% endfor -%}
{% endfor -%}
{% for algorithm in ['hash', 'grace_hash'] -%}
SET join_algorithm='{{ algorithm }}';
{% for join_type in ['INNER'] -%}
{% for join_strictness in ['ANY'] -%}
SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0);
{% endfor -%}
{% endfor -%}
{% endfor -%}
{% for algorithm in ['hash'] -%}
SET join_algorithm='{{ algorithm }}';
{% for join_type in ['INNER'] -%}
{% for join_strictness in ['ANY'] -%}
SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL;
{% endfor -%}
{% endfor -%}
{% endfor -%}
-- { echoOff }
-- test error messages

View File

@ -0,0 +1,4 @@
CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x;
INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1;

View File

@ -0,0 +1,30 @@
-- { echoOn }
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
[[(1,1),(2,2),(3,3),(1,1)]]
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
MultiLineString
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
MULTILINESTRING((1 1,2 2,3 3,1 1))
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
[[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]]
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
MultiLineString
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
MULTILINESTRING((1 1,2 2,3 3,1 1),(1 0,2 0,3 0))
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x
SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y);
POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon
-- Non constant tests
DROP TABLE IF EXISTS t;
CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory;
INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))');
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64)))
select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord;
POLYGON((1 1,2 2,3 3,1 1)) [[(1,1),(2,2),(3,3),(1,1)]] 1
POLYGON((1 1,2 2,3 3,1 1),(1 0,2 0,3 0,1 0)) [[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] 1
POLYGON((1 0,2 1,3 0,4 1,5 0,6 1,7 0,8 1,9 0,10 1,1 0)) [[(1,0),(2,1),(3,0),(4,1),(5,0),(6,1),(7,0),(8,1),(9,0),(10,1)]] 1

View File

@ -0,0 +1,26 @@
-- { echoOn }
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));
SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'));
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x
SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y);
-- Non constant tests
DROP TABLE IF EXISTS t;
CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory;
INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))');
INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))');
INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))');
-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString.
-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64)))
select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord;

View File

@ -0,0 +1,6 @@
Expression ((Project names + Projection))
Aggregating
Expression (Before GROUP BY)
ReadFromMerge
Filter (( + ( + )))
ReadFromMergeTree (default.test_03217_merge_replica_1)

View File

@ -0,0 +1,16 @@
CREATE TABLE test_03217_merge_replica_1(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1')
ORDER BY x;
CREATE TABLE test_03217_merge_replica_2(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2')
ORDER BY x;
CREATE TABLE test_03217_all_replicas (x UInt32)
ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*');
INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10);
SYSTEM SYNC REPLICA test_03217_merge_replica_2;
-- If the filter on _table is not applied, then the plan will show both replicas
EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1;

View File

@ -0,0 +1,6 @@
information_schema tables
both default test_03217_system_tables_replica_1 r1
both default test_03217_system_tables_replica_2 r2
default test_03217_system_tables_replica_1 r1
1
1

Some files were not shown because too many files have changed in this diff Show More