mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge branch 'master' into query_plan_for_merge
This commit is contained in:
commit
e1b2952a60
@ -66,13 +66,11 @@ TRAP(gethostbyname)
|
||||
TRAP(gethostbyname2)
|
||||
TRAP(gethostent)
|
||||
TRAP(getlogin)
|
||||
TRAP(getmntent)
|
||||
TRAP(getnetbyaddr)
|
||||
TRAP(getnetbyname)
|
||||
TRAP(getnetent)
|
||||
TRAP(getnetgrent)
|
||||
TRAP(getnetgrent_r)
|
||||
TRAP(getopt)
|
||||
TRAP(getopt_long)
|
||||
TRAP(getopt_long_only)
|
||||
TRAP(getpass)
|
||||
@ -133,7 +131,6 @@ TRAP(nrand48)
|
||||
TRAP(__ppc_get_timebase_freq)
|
||||
TRAP(ptsname)
|
||||
TRAP(putchar_unlocked)
|
||||
TRAP(putenv)
|
||||
TRAP(pututline)
|
||||
TRAP(pututxline)
|
||||
TRAP(putwchar_unlocked)
|
||||
@ -148,7 +145,6 @@ TRAP(sethostent)
|
||||
TRAP(sethostid)
|
||||
TRAP(setkey)
|
||||
//TRAP(setlocale) // Used by replxx at startup
|
||||
TRAP(setlogmask)
|
||||
TRAP(setnetent)
|
||||
TRAP(setnetgrent)
|
||||
TRAP(setprotoent)
|
||||
@ -203,7 +199,6 @@ TRAP(lgammal)
|
||||
TRAP(nftw)
|
||||
TRAP(nl_langinfo)
|
||||
TRAP(putc_unlocked)
|
||||
TRAP(rand)
|
||||
/** In the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe. However, in modern
|
||||
* implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams
|
||||
* are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external
|
||||
@ -288,4 +283,14 @@ TRAP(tss_get)
|
||||
TRAP(tss_set)
|
||||
TRAP(tss_delete)
|
||||
|
||||
#ifndef USE_MUSL
|
||||
/// These produce duplicate symbol errors when statically linking with musl.
|
||||
/// Maybe we can remove them from the musl fork.
|
||||
TRAP(getopt)
|
||||
TRAP(putenv)
|
||||
TRAP(setlogmask)
|
||||
TRAP(rand)
|
||||
TRAP(getmntent)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -48,25 +48,17 @@ std::string PathImpl::currentImpl()
|
||||
std::string PathImpl::homeImpl()
|
||||
{
|
||||
std::string path;
|
||||
#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
|
||||
size_t buf_size = 1024; // Same as glibc use for getpwuid
|
||||
std::vector<char> buf(buf_size);
|
||||
struct passwd res;
|
||||
struct passwd* pwd = nullptr;
|
||||
|
||||
getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
|
||||
#else
|
||||
struct passwd* pwd = getpwuid(getuid());
|
||||
#endif
|
||||
if (pwd)
|
||||
path = pwd->pw_dir;
|
||||
else
|
||||
{
|
||||
#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
|
||||
getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
|
||||
#else
|
||||
pwd = getpwuid(geteuid());
|
||||
#endif
|
||||
if (pwd)
|
||||
path = pwd->pw_dir;
|
||||
else
|
||||
|
@ -18,4 +18,4 @@ target_compile_options (_poco_util
|
||||
-Wno-zero-as-null-pointer-constant
|
||||
)
|
||||
target_include_directories (_poco_util SYSTEM PUBLIC "include")
|
||||
target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML)
|
||||
target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML Poco::Net)
|
||||
|
@ -241,6 +241,20 @@ namespace Util
|
||||
/// If the value contains references to other properties (${<property>}), these
|
||||
/// are expanded.
|
||||
|
||||
std::string getHost(const std::string & key) const;
|
||||
/// Returns the string value of the host property with the given name.
|
||||
/// Throws a NotFoundException if the key does not exist.
|
||||
/// Throws a SyntaxException if the property is not a valid host (IP address or domain).
|
||||
/// If the value contains references to other properties (${<property>}), these
|
||||
/// are expanded.
|
||||
|
||||
std::string getHost(const std::string & key, const std::string & defaultValue) const;
|
||||
/// If a property with the given key exists, returns the host property's string value,
|
||||
/// otherwise returns the given default value.
|
||||
/// Throws a SyntaxException if the property is not a valid host (IP address or domain).
|
||||
/// If the value contains references to other properties (${<property>}), these
|
||||
/// are expanded.
|
||||
|
||||
virtual void setString(const std::string & key, const std::string & value);
|
||||
/// Sets the property with the given key to the given value.
|
||||
/// An already existing value for the key is overwritten.
|
||||
@ -339,12 +353,35 @@ namespace Util
|
||||
static bool parseBool(const std::string & value);
|
||||
void setRawWithEvent(const std::string & key, std::string value);
|
||||
|
||||
static void checkHostValidity(const std::string & value);
|
||||
/// Throws a SyntaxException if the value is not a valid host (IP address or domain).
|
||||
|
||||
virtual ~AbstractConfiguration();
|
||||
|
||||
private:
|
||||
std::string internalExpand(const std::string & value) const;
|
||||
std::string uncheckedExpand(const std::string & value) const;
|
||||
|
||||
static bool isValidIPv4Address(const std::string & value);
|
||||
/// IPv4 address considered valid if it is "0.0.0.0" or one of those,
|
||||
/// defined by inet_aton() or inet_addr()
|
||||
|
||||
static bool isValidIPv6Address(const std::string & value);
|
||||
/// IPv6 address considered valid if it is "::" or one of those,
|
||||
/// defined by inet_pton() with AF_INET6 flag
|
||||
/// (in this case it may have scope id and may be surrounded by '[', ']')
|
||||
|
||||
static bool isValidDomainName(const std::string & value);
|
||||
/// <domain> ::= <subdomain> [ "." ]
|
||||
/// <subdomain> ::= <label> | <subdomain> "." <label>
|
||||
/// <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
/// <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||
/// <let-dig-hyp> ::= <let-dig> | "-"
|
||||
/// <let-dig> ::= <letter> | <digit>
|
||||
/// <letter> ::= any one of the 52 alphabetic characters A through Z in
|
||||
/// upper case and a through z in lower case
|
||||
/// <digit> ::= any one of the ten digits 0 through 9
|
||||
|
||||
AbstractConfiguration(const AbstractConfiguration &);
|
||||
AbstractConfiguration & operator=(const AbstractConfiguration &);
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "Poco/NumberParser.h"
|
||||
#include "Poco/NumberFormatter.h"
|
||||
#include "Poco/String.h"
|
||||
#include "Poco/Net/IPAddressImpl.h"
|
||||
|
||||
|
||||
using Poco::Mutex;
|
||||
@ -263,6 +264,41 @@ bool AbstractConfiguration::getBool(const std::string& key, bool defaultValue) c
|
||||
}
|
||||
|
||||
|
||||
std::string AbstractConfiguration::getHost(const std::string& key) const
|
||||
{
|
||||
Mutex::ScopedLock lock(_mutex);
|
||||
|
||||
std::string value;
|
||||
if (getRaw(key, value))
|
||||
{
|
||||
std::string expandedValue = internalExpand(value);
|
||||
checkHostValidity(expandedValue);
|
||||
return expandedValue;
|
||||
}
|
||||
else
|
||||
throw NotFoundException(key);
|
||||
}
|
||||
|
||||
|
||||
std::string AbstractConfiguration::getHost(const std::string& key, const std::string& defaultValue) const
|
||||
{
|
||||
Mutex::ScopedLock lock(_mutex);
|
||||
|
||||
std::string value;
|
||||
if (getRaw(key, value))
|
||||
{
|
||||
std::string expandedValue = internalExpand(value);
|
||||
checkHostValidity(expandedValue);
|
||||
return expandedValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
checkHostValidity(defaultValue);
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AbstractConfiguration::setString(const std::string& key, const std::string& value)
|
||||
{
|
||||
setRawWithEvent(key, value);
|
||||
@ -529,4 +565,68 @@ void AbstractConfiguration::setRawWithEvent(const std::string& key, std::string
|
||||
}
|
||||
|
||||
|
||||
void AbstractConfiguration::checkHostValidity(const std::string& value)
|
||||
{
|
||||
if (!isValidIPv4Address(value) && !isValidIPv6Address(value) && !isValidDomainName(value))
|
||||
{
|
||||
throw SyntaxException("Property is not a valid host name", value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool AbstractConfiguration::isValidIPv4Address(const std::string& value)
|
||||
{
|
||||
using Poco::Net::Impl::IPv4AddressImpl;
|
||||
IPv4AddressImpl empty4 = IPv4AddressImpl();
|
||||
|
||||
IPv4AddressImpl ipAddress = IPv4AddressImpl::parse(value);
|
||||
return ipAddress != empty4 || value == "0.0.0.0";
|
||||
}
|
||||
|
||||
|
||||
bool AbstractConfiguration::isValidIPv6Address(const std::string& value)
|
||||
{
|
||||
#if defined(POCO_HAVE_IPv6)
|
||||
using Poco::Net::Impl::IPv6AddressImpl;
|
||||
IPv6AddressImpl empty6 = IPv6AddressImpl();
|
||||
|
||||
IPv6AddressImpl ipAddress = IPv6AddressImpl::parse(value);
|
||||
return ipAddress != empty6 || value == "::";
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool AbstractConfiguration::isValidDomainName(const std::string& value)
|
||||
{
|
||||
if (value.empty() || value == "." || value.length() > 253)
|
||||
return false;
|
||||
int labelLength = 0;
|
||||
char oldChar = 0;
|
||||
|
||||
for (char ch : value)
|
||||
{
|
||||
if (ch == '.')
|
||||
{
|
||||
if (labelLength == 0 || labelLength > 63 || oldChar == '-')
|
||||
return false;
|
||||
labelLength = 0;
|
||||
}
|
||||
else if (isalnum(ch) || ch == '-')
|
||||
{
|
||||
if (labelLength == 0 && (ch == '-' || isdigit(ch)))
|
||||
return false;
|
||||
++labelLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
oldChar = ch;
|
||||
}
|
||||
return oldChar == '.' || (labelLength > 0 && labelLength <= 63 && oldChar != '-');
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Util
|
||||
|
@ -140,6 +140,12 @@ if (CMAKE_CROSSCOMPILING)
|
||||
message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}")
|
||||
endif()
|
||||
|
||||
# llvm-project/llvm/cmake/config-ix.cmake does a weird thing: it defines _LARGEFILE64_SOURCE,
|
||||
# then checks if lseek64() function exists, then undefines _LARGEFILE64_SOURCE.
|
||||
# Then the actual code that uses this function *doesn't* define _LARGEFILE64_SOURCE, so lseek64()
|
||||
# may not exist and compilation fails. This happens with musl.
|
||||
add_compile_definitions("_LARGEFILE64_SOURCE")
|
||||
|
||||
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
|
||||
|
||||
set_directory_properties (PROPERTIES
|
||||
|
2
contrib/sysroot
vendored
2
contrib/sysroot
vendored
@ -1 +1 @@
|
||||
Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8
|
||||
Subproject commit 5be834147d5b5dd77ca2b821f356982029320513
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.8.2.3"
|
||||
ARG VERSION="24.8.3.59"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.8.2.3"
|
||||
ARG VERSION="24.8.3.59"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.8.2.3"
|
||||
ARG VERSION="24.8.3.59"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
32
docs/changelogs/v24.3.10.33-lts.md
Normal file
32
docs/changelogs/v24.3.10.33-lts.md
Normal file
@ -0,0 +1,32 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.3.10.33-lts (37b6502ebf0) FIXME as compared to v24.3.9.5-lts (a939270465e)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#68870](https://github.com/ClickHouse/ClickHouse/issues/68870): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Backported in [#69095](https://github.com/ClickHouse/ClickHouse/issues/69095): Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#68995](https://github.com/ClickHouse/ClickHouse/issues/68995): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Backported in [#68844](https://github.com/ClickHouse/ClickHouse/issues/68844): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#68881](https://github.com/ClickHouse/ClickHouse/issues/68881): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Backported in [#69054](https://github.com/ClickHouse/ClickHouse/issues/69054): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68856](https://github.com/ClickHouse/ClickHouse/issues/68856): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#69152](https://github.com/ClickHouse/ClickHouse/issues/69152): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69112](https://github.com/ClickHouse/ClickHouse/issues/69112): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#68938](https://github.com/ClickHouse/ClickHouse/issues/68938):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#68826](https://github.com/ClickHouse/ClickHouse/issues/68826): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#68754](https://github.com/ClickHouse/ClickHouse/issues/68754): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#69044](https://github.com/ClickHouse/ClickHouse/issues/69044): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
29
docs/changelogs/v24.5.7.31-stable.md
Normal file
29
docs/changelogs/v24.5.7.31-stable.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.5.7.31-stable (6c185e9aec1) FIXME as compared to v24.5.6.45-stable (bdca8604c29)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#68564](https://github.com/ClickHouse/ClickHouse/issues/68564): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68996](https://github.com/ClickHouse/ClickHouse/issues/68996): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Backported in [#68865](https://github.com/ClickHouse/ClickHouse/issues/68865): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#69004](https://github.com/ClickHouse/ClickHouse/issues/69004): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68882](https://github.com/ClickHouse/ClickHouse/issues/68882): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Backported in [#69023](https://github.com/ClickHouse/ClickHouse/issues/69023): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68858](https://github.com/ClickHouse/ClickHouse/issues/68858): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68784](https://github.com/ClickHouse/ClickHouse/issues/68784): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Backported in [#69154](https://github.com/ClickHouse/ClickHouse/issues/69154): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#68940](https://github.com/ClickHouse/ClickHouse/issues/68940):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#68828](https://github.com/ClickHouse/ClickHouse/issues/68828): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#69046](https://github.com/ClickHouse/ClickHouse/issues/69046): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
29
docs/changelogs/v24.6.5.30-stable.md
Normal file
29
docs/changelogs/v24.6.5.30-stable.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.6.5.30-stable (e6e196c92d6) FIXME as compared to v24.6.4.42-stable (c534bb4b4dd)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#68969](https://github.com/ClickHouse/ClickHouse/issues/68969): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Backported in [#68814](https://github.com/ClickHouse/ClickHouse/issues/68814): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#69005](https://github.com/ClickHouse/ClickHouse/issues/69005): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68883](https://github.com/ClickHouse/ClickHouse/issues/68883): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Backported in [#69025](https://github.com/ClickHouse/ClickHouse/issues/69025): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68860](https://github.com/ClickHouse/ClickHouse/issues/68860): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68786](https://github.com/ClickHouse/ClickHouse/issues/68786): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Backported in [#69156](https://github.com/ClickHouse/ClickHouse/issues/69156): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69116](https://github.com/ClickHouse/ClickHouse/issues/69116): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#68942](https://github.com/ClickHouse/ClickHouse/issues/68942):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#68830](https://github.com/ClickHouse/ClickHouse/issues/68830): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#69048](https://github.com/ClickHouse/ClickHouse/issues/69048): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
50
docs/changelogs/v24.8.3.59-lts.md
Normal file
50
docs/changelogs/v24.8.3.59-lts.md
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.8.3.59-lts (e729b9fa40e) FIXME as compared to v24.8.2.3-lts (b54f79ed323)
|
||||
|
||||
#### New Feature
|
||||
* Backported in [#68710](https://github.com/ClickHouse/ClickHouse/issues/68710): Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'` (or of course just: `SYSTEM DROP QUERY CACHE` which will clear the entire query cache). [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#69097](https://github.com/ClickHouse/ClickHouse/issues/69097): Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#68973](https://github.com/ClickHouse/ClickHouse/issues/68973): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Backported in [#68818](https://github.com/ClickHouse/ClickHouse/issues/68818): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#68893](https://github.com/ClickHouse/ClickHouse/issues/68893): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68721](https://github.com/ClickHouse/ClickHouse/issues/68721): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Backported in [#69029](https://github.com/ClickHouse/ClickHouse/issues/69029): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68864](https://github.com/ClickHouse/ClickHouse/issues/68864): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68854](https://github.com/ClickHouse/ClickHouse/issues/68854): Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68790](https://github.com/ClickHouse/ClickHouse/issues/68790): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Backported in [#69108](https://github.com/ClickHouse/ClickHouse/issues/69108): TODO. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#68850](https://github.com/ClickHouse/ClickHouse/issues/68850): Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68911](https://github.com/ClickHouse/ClickHouse/issues/68911): Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#69160](https://github.com/ClickHouse/ClickHouse/issues/69160): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69072](https://github.com/ClickHouse/ClickHouse/issues/69072): Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#69016](https://github.com/ClickHouse/ClickHouse/issues/69016): Don't use serializations cache in const Dynamic column methods. It could let to use-of-unitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#69120](https://github.com/ClickHouse/ClickHouse/issues/69120): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#68947](https://github.com/ClickHouse/ClickHouse/issues/68947):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#68704](https://github.com/ClickHouse/ClickHouse/issues/68704): Fix enumerating dynamic subcolumns. [#68582](https://github.com/ClickHouse/ClickHouse/pull/68582) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#69000](https://github.com/ClickHouse/ClickHouse/issues/69000): Prioritizing of virtual columns in hive partitioning. [#68606](https://github.com/ClickHouse/ClickHouse/pull/68606) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Backported in [#68799](https://github.com/ClickHouse/ClickHouse/issues/68799): CI: Disable SQLLogic job. [#68654](https://github.com/ClickHouse/ClickHouse/pull/68654) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#68834](https://github.com/ClickHouse/ClickHouse/issues/68834): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#68781](https://github.com/ClickHouse/ClickHouse/issues/68781): Fix flaky test 00989_parallel_parts_loading. [#68737](https://github.com/ClickHouse/ClickHouse/pull/68737) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#68762](https://github.com/ClickHouse/ClickHouse/issues/68762): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#68810](https://github.com/ClickHouse/ClickHouse/issues/68810): Try to disable rerun check if job triggered manually. [#68751](https://github.com/ClickHouse/ClickHouse/pull/68751) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#68962](https://github.com/ClickHouse/ClickHouse/issues/68962): Fix 2477 timeout. [#68752](https://github.com/ClickHouse/ClickHouse/pull/68752) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Backported in [#68977](https://github.com/ClickHouse/ClickHouse/issues/68977): Check setting use_json_alias_for_old_object_type in runtime. [#68793](https://github.com/ClickHouse/ClickHouse/pull/68793) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68852](https://github.com/ClickHouse/ClickHouse/issues/68852): Make dynamic structure selection more consistent. [#68802](https://github.com/ClickHouse/ClickHouse/pull/68802) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#69052](https://github.com/ClickHouse/ClickHouse/issues/69052): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
@ -111,13 +111,14 @@ ANN indexes are built during column insertion and merge. As a result, `INSERT` a
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support these queries:
|
||||
ANN indexes support this type of query:
|
||||
|
||||
``` sql
|
||||
WITH [...] AS reference_vector
|
||||
SELECT *
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
WHERE ... -- WHERE clause is optional
|
||||
ORDER BY Distance(vectors, reference_vector)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
|
@ -3226,7 +3226,7 @@ Default value: `0`.
|
||||
|
||||
## lightweight_deletes_sync {#lightweight_deletes_sync}
|
||||
|
||||
The same as 'mutation_sync', but controls only execution of lightweight deletes.
|
||||
The same as [`mutations_sync`](#mutations_sync), but controls only execution of lightweight deletes.
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -499,7 +499,7 @@ Required parameters:
|
||||
- `type` — `encrypted`. Otherwise the encrypted disk is not created.
|
||||
- `disk` — Type of disk for data storage.
|
||||
- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
|
||||
You can specify multiple keys using the `id` attribute (see example above).
|
||||
You can specify multiple keys using the `id` attribute (see example below).
|
||||
|
||||
Optional parameters:
|
||||
|
||||
|
@ -0,0 +1,44 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/distinctdynamictypes
|
||||
sidebar_position: 215
|
||||
---
|
||||
|
||||
# distinctDynamicTypes
|
||||
|
||||
Calculates the list of distinct data types stored in [Dynamic](../../data-types/dynamic.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
distinctDynamicTypes(dynamic)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `dynamic` — [Dynamic](../../data-types/dynamic.md) column.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sorted list of data type names [Array(String)](../../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_dynamic;
|
||||
CREATE TABLE test_dynamic(d Dynamic) ENGINE = Memory;
|
||||
INSERT INTO test_dynamic VALUES (42), (NULL), ('Hello'), ([1, 2, 3]), ('2020-01-01'), (map(1, 2)), (43), ([4, 5]), (NULL), ('World'), (map(3, 4))
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT distinctDynamicTypes(d) FROM test_dynamic;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─distinctDynamicTypes(d)──────────────────────────────────────┐
|
||||
│ ['Array(Int64)','Date','Int64','Map(UInt8, UInt8)','String'] │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -0,0 +1,125 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/distinctjsonpaths
|
||||
sidebar_position: 216
|
||||
---
|
||||
|
||||
# distinctJSONPaths
|
||||
|
||||
Calculates the list of distinct paths stored in [JSON](../../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
distinctJSONPaths(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../../data-types/newjson.md) column.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sorted list of paths [Array(String)](../../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_json;
|
||||
CREATE TABLE test_json(json JSON) ENGINE = Memory;
|
||||
INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT distinctJSONPaths(json) FROM test_json;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─distinctJSONPaths(json)───┐
|
||||
│ ['a','b','c.d.e','c.d.f'] │
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
# distinctJSONPathsAndTypes
|
||||
|
||||
Calculates the list of distinct paths and their types stored in [JSON](../../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
distinctJSONPathsAndTypes(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../../data-types/newjson.md) column.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
- The sorted map of paths and types [Map(String, Array(String))](../../data-types/map.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_json;
|
||||
CREATE TABLE test_json(json JSON) ENGINE = Memory;
|
||||
INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT distinctJSONPathsAndTypes(json) FROM test_json;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```reference
|
||||
┌─distinctJSONPathsAndTypes(json)───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'a':['Int64'],'b':['Array(Nullable(Int64))','String'],'c.d.e':['Date'],'c.d.f':['Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))']} │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Note**
|
||||
|
||||
If JSON declaration contains paths with specified types, these paths will be always included in the result of `distinctJSONPaths/distinctJSONPathsAndTypes` functions even if input data didn't have values for these paths.
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS test_json;
|
||||
CREATE TABLE test_json(json JSON(a UInt32)) ENGINE = Memory;
|
||||
INSERT INTO test_json VALUES ('{"b" : "Hello"}'), ('{"b" : "World", "c" : [1, 2, 3]}');
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT json FROM test_json;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json──────────────────────────────────┐
|
||||
│ {"a":0,"b":"Hello"} │
|
||||
│ {"a":0,"b":"World","c":["1","2","3"]} │
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT distinctJSONPaths(json) FROM test_json;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─distinctJSONPaths(json)─┐
|
||||
│ ['a','b','c'] │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT distinctJSONPathsAndTypes(json) FROM test_json;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─distinctJSONPathsAndTypes(json)────────────────────────────────┐
|
||||
│ {'a':['UInt32'],'b':['String'],'c':['Array(Nullable(Int64))']} │
|
||||
└────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -505,7 +505,130 @@ As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and move
|
||||
|
||||
## Introspection functions
|
||||
|
||||
There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes).
|
||||
There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes), [distinctDynamicTypes](../aggregate-functions/reference/distinctdynamictypes.md), [distinctJSONPaths and distinctJSONPathsAndTypes](../aggregate-functions/reference/distinctjsonpaths.md)
|
||||
|
||||
**Examples**
|
||||
|
||||
Let's investigate the content of [GH Archive](https://www.gharchive.org/) dataset for `2020-01-01` date:
|
||||
|
||||
```sql
|
||||
SELECT arrayJoin(distinctJSONPaths(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─arrayJoin(distinctJSONPaths(json))─────────────────────────┐
|
||||
│ actor.avatar_url │
|
||||
│ actor.display_login │
|
||||
│ actor.gravatar_id │
|
||||
│ actor.id │
|
||||
│ actor.login │
|
||||
│ actor.url │
|
||||
│ created_at │
|
||||
│ id │
|
||||
│ org.avatar_url │
|
||||
│ org.gravatar_id │
|
||||
│ org.id │
|
||||
│ org.login │
|
||||
│ org.url │
|
||||
│ payload.action │
|
||||
│ payload.before │
|
||||
│ payload.comment._links.html.href │
|
||||
│ payload.comment._links.pull_request.href │
|
||||
│ payload.comment._links.self.href │
|
||||
│ payload.comment.author_association │
|
||||
│ payload.comment.body │
|
||||
│ payload.comment.commit_id │
|
||||
│ payload.comment.created_at │
|
||||
│ payload.comment.diff_hunk │
|
||||
│ payload.comment.html_url │
|
||||
│ payload.comment.id │
|
||||
│ payload.comment.in_reply_to_id │
|
||||
│ payload.comment.issue_url │
|
||||
│ payload.comment.line │
|
||||
│ payload.comment.node_id │
|
||||
│ payload.comment.original_commit_id │
|
||||
│ payload.comment.original_position │
|
||||
│ payload.comment.path │
|
||||
│ payload.comment.position │
|
||||
│ payload.comment.pull_request_review_id │
|
||||
...
|
||||
│ payload.release.node_id │
|
||||
│ payload.release.prerelease │
|
||||
│ payload.release.published_at │
|
||||
│ payload.release.tag_name │
|
||||
│ payload.release.tarball_url │
|
||||
│ payload.release.target_commitish │
|
||||
│ payload.release.upload_url │
|
||||
│ payload.release.url │
|
||||
│ payload.release.zipball_url │
|
||||
│ payload.size │
|
||||
│ public │
|
||||
│ repo.id │
|
||||
│ repo.name │
|
||||
│ repo.url │
|
||||
│ type │
|
||||
└─arrayJoin(distinctJSONPaths(json))─────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) SETTINGS date_time_input_format='best_effort'
|
||||
```
|
||||
|
||||
|
||||
```text
|
||||
┌─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┐
|
||||
│ ('actor.avatar_url',['String']) │
|
||||
│ ('actor.display_login',['String']) │
|
||||
│ ('actor.gravatar_id',['String']) │
|
||||
│ ('actor.id',['Int64']) │
|
||||
│ ('actor.login',['String']) │
|
||||
│ ('actor.url',['String']) │
|
||||
│ ('created_at',['DateTime']) │
|
||||
│ ('id',['String']) │
|
||||
│ ('org.avatar_url',['String']) │
|
||||
│ ('org.gravatar_id',['String']) │
|
||||
│ ('org.id',['Int64']) │
|
||||
│ ('org.login',['String']) │
|
||||
│ ('org.url',['String']) │
|
||||
│ ('payload.action',['String']) │
|
||||
│ ('payload.before',['String']) │
|
||||
│ ('payload.comment._links.html.href',['String']) │
|
||||
│ ('payload.comment._links.pull_request.href',['String']) │
|
||||
│ ('payload.comment._links.self.href',['String']) │
|
||||
│ ('payload.comment.author_association',['String']) │
|
||||
│ ('payload.comment.body',['String']) │
|
||||
│ ('payload.comment.commit_id',['String']) │
|
||||
│ ('payload.comment.created_at',['DateTime']) │
|
||||
│ ('payload.comment.diff_hunk',['String']) │
|
||||
│ ('payload.comment.html_url',['String']) │
|
||||
│ ('payload.comment.id',['Int64']) │
|
||||
│ ('payload.comment.in_reply_to_id',['Int64']) │
|
||||
│ ('payload.comment.issue_url',['String']) │
|
||||
│ ('payload.comment.line',['Int64']) │
|
||||
│ ('payload.comment.node_id',['String']) │
|
||||
│ ('payload.comment.original_commit_id',['String']) │
|
||||
│ ('payload.comment.original_position',['Int64']) │
|
||||
│ ('payload.comment.path',['String']) │
|
||||
│ ('payload.comment.position',['Int64']) │
|
||||
│ ('payload.comment.pull_request_review_id',['Int64']) │
|
||||
...
|
||||
│ ('payload.release.node_id',['String']) │
|
||||
│ ('payload.release.prerelease',['Bool']) │
|
||||
│ ('payload.release.published_at',['DateTime']) │
|
||||
│ ('payload.release.tag_name',['String']) │
|
||||
│ ('payload.release.tarball_url',['String']) │
|
||||
│ ('payload.release.target_commitish',['String']) │
|
||||
│ ('payload.release.upload_url',['String']) │
|
||||
│ ('payload.release.url',['String']) │
|
||||
│ ('payload.release.zipball_url',['String']) │
|
||||
│ ('payload.size',['Int64']) │
|
||||
│ ('public',['Bool']) │
|
||||
│ ('repo.id',['Int64']) │
|
||||
│ ('repo.name',['String']) │
|
||||
│ ('repo.url',['String']) │
|
||||
│ ('type',['String']) │
|
||||
└─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘
|
||||
```
|
||||
|
||||
## Tips for better usage of the JSON type
|
||||
|
||||
|
@ -2035,6 +2035,7 @@ Query:
|
||||
SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]);
|
||||
```
|
||||
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
@ -2043,6 +2044,43 @@ Result:
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayZipUnaligned
|
||||
|
||||
Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayZipUnaligned(arr1, arr2, ..., arrN)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `arrN` — [Array](../data-types/array.md).
|
||||
|
||||
The function can take any number of arrays of different types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). If the arrays have different sizes, the shorter arrays will be padded with `null` values.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayZipUnaligned(['a'], [1, 2, 3]);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─arrayZipUnaligned(['a'], [1, 2, 3])─┐
|
||||
│ [('a',1),(NULL,2),(NULL,3)] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## arrayAUC
|
||||
|
||||
Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>).
|
||||
|
@ -8,14 +8,14 @@ slug: /en/guides/developer/transactional
|
||||
This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
|
||||
- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
|
||||
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
|
||||
- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
|
||||
- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen. Clients inside of another transaction have [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation), while clients outside of a transaction have [read uncommitted](https://en.wikipedia.org/wiki/Isolation_(database_systems)#Read_uncommitted) isolation level.
|
||||
- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
|
||||
- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
|
||||
|
||||
## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
|
||||
|
||||
Same as Case 1 above, with this detail:
|
||||
- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own
|
||||
- If table has many partitions and INSERT covers many partitions, then insertion into every partition is transactional on its own
|
||||
|
||||
|
||||
## Case 3: INSERT into one distributed table of the MergeTree* family
|
||||
@ -38,7 +38,7 @@ Same as Case 1 above, with this detail:
|
||||
- the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
|
||||
- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
|
||||
- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
|
||||
- ClickHouse is using MVCC with snapshot isolation internally
|
||||
- ClickHouse is using [MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control) with [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation) internally for concurrent transactions
|
||||
- all ACID properties are valid even in the case of server kill/crash
|
||||
- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
|
||||
- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
|
||||
@ -260,7 +260,7 @@ FROM mergetree_table
|
||||
### Transactions introspection
|
||||
|
||||
You can inspect transactions by querying the `system.transactions` table, but note that you cannot query that
|
||||
table from a session that is in a transaction–open a second `clickhouse client` session to query that table.
|
||||
table from a session that is in a transaction. Open a second `clickhouse client` session to query that table.
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
|
161
src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp
Normal file
161
src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include <unordered_set>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
struct AggregateFunctionDistinctDynamicTypesData
|
||||
{
|
||||
constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
|
||||
|
||||
std::unordered_set<String> data;
|
||||
|
||||
void add(const String & type)
|
||||
{
|
||||
data.insert(type);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionDistinctDynamicTypesData & other)
|
||||
{
|
||||
data.insert(other.data.begin(), other.data.end());
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeVarUInt(data.size(), buf);
|
||||
for (const auto & type : data)
|
||||
writeStringBinary(type, buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
size_t size;
|
||||
readVarUInt(size, buf);
|
||||
if (size > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", MAX_ARRAY_SIZE, size);
|
||||
|
||||
data.reserve(size);
|
||||
String type;
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
{
|
||||
readStringBinary(type, buf);
|
||||
data.insert(type);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(IColumn & column)
|
||||
{
|
||||
/// Insert types in sorted order for better output.
|
||||
auto & array_column = assert_cast<ColumnArray &>(column);
|
||||
auto & string_column = assert_cast<ColumnString &>(array_column.getData());
|
||||
std::vector<String> sorted_data(data.begin(), data.end());
|
||||
std::sort(sorted_data.begin(), sorted_data.end());
|
||||
for (const auto & type : sorted_data)
|
||||
string_column.insertData(type.data(), type.size());
|
||||
array_column.getOffsets().push_back(string_column.size());
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates the list of distinct data types in Dynamic column.
|
||||
class AggregateFunctionDistinctDynamicTypes final : public IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionDistinctDynamicTypes(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>(argument_types_, {}, std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()))
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "distinctDynamicTypes"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic & >(*columns[0]);
|
||||
if (dynamic_column.isNullAt(row_num))
|
||||
return;
|
||||
|
||||
data(place).add(dynamic_column.getTypeNameAt(row_num));
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
|
||||
IAggregateFunctionDataHelper::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
|
||||
/// Optimization for case when we add all rows from the column into single place.
|
||||
/// In this case we can avoid iterating over all rows because we can get all types
|
||||
/// in Dynamic column in a more efficient way.
|
||||
else
|
||||
assert_cast<const ColumnDynamic & >(*columns[0]).getAllTypeNamesInto(data(place).data);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict /*place*/,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t /*length*/,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
/// Default value for Dynamic is NULL, so nothing to add.
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
data(place).insertResultInto(to);
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionDistinctDynamicTypes(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
if (argument_types.size() != 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Incorrect number of arguments for aggregate function {}. Expected single argument with type Dynamic, got {} arguments", name, argument_types.size());
|
||||
|
||||
if (!isDynamic(argument_types[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type Dynamic", argument_types[0]->getName(), name);
|
||||
|
||||
return std::make_shared<AggregateFunctionDistinctDynamicTypes>(argument_types);
|
||||
}
|
||||
|
||||
void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("distinctDynamicTypes", createAggregateFunctionDistinctDynamicTypes);
|
||||
}
|
||||
|
||||
}
|
350
src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp
Normal file
350
src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp
Normal file
@ -0,0 +1,350 @@
|
||||
#include <unordered_set>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
constexpr static size_t DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE = 0xFFFFFF;
|
||||
|
||||
|
||||
struct AggregateFunctionDistinctJSONPathsData
|
||||
{
|
||||
static constexpr auto name = "distinctJSONPaths";
|
||||
|
||||
std::unordered_set<String> data;
|
||||
|
||||
void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> &)
|
||||
{
|
||||
for (const auto & [path, _] : column.getTypedPaths())
|
||||
data.insert(path);
|
||||
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
|
||||
{
|
||||
/// Add path from dynamic paths only if it's not NULL in this row.
|
||||
if (!dynamic_column->isNullAt(row_num))
|
||||
data.insert(path);
|
||||
}
|
||||
|
||||
/// Iterate over paths in shared data in this row.
|
||||
const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
|
||||
const auto & shared_data_offsets = column.getSharedDataOffsets();
|
||||
const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
|
||||
const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
|
||||
for (size_t i = start; i != end; ++i)
|
||||
data.insert(shared_data_paths->getDataAt(i).toString());
|
||||
}
|
||||
|
||||
void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> &)
|
||||
{
|
||||
for (const auto & [path, _] : column.getTypedPaths())
|
||||
data.insert(path);
|
||||
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
|
||||
{
|
||||
/// Add dynamic path only if it has at least one non-null value.
|
||||
/// getNumberOfDefaultRows for Dynamic column is O(1).
|
||||
if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
|
||||
data.insert(path);
|
||||
}
|
||||
|
||||
/// Iterate over all paths in shared data.
|
||||
const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
|
||||
for (size_t i = 0; i != shared_data_paths->size(); ++i)
|
||||
data.insert(shared_data_paths->getDataAt(i).toString());
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionDistinctJSONPathsData & other)
|
||||
{
|
||||
data.insert(other.data.begin(), other.data.end());
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeVarUInt(data.size(), buf);
|
||||
for (const auto & path : data)
|
||||
writeStringBinary(path, buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
size_t size;
|
||||
readVarUInt(size, buf);
|
||||
if (size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, size);
|
||||
|
||||
String path;
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
{
|
||||
readStringBinary(path, buf);
|
||||
data.insert(path);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(IColumn & column)
|
||||
{
|
||||
/// Insert paths in sorted order for better output.
|
||||
auto & array_column = assert_cast<ColumnArray &>(column);
|
||||
auto & string_column = assert_cast<ColumnString &>(array_column.getData());
|
||||
std::vector<String> sorted_data(data.begin(), data.end());
|
||||
std::sort(sorted_data.begin(), sorted_data.end());
|
||||
for (const auto & path : sorted_data)
|
||||
string_column.insertData(path.data(), path.size());
|
||||
array_column.getOffsets().push_back(string_column.size());
|
||||
}
|
||||
|
||||
static DataTypePtr getResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
};
|
||||
|
||||
struct AggregateFunctionDistinctJSONPathsAndTypesData
|
||||
{
|
||||
static constexpr auto name = "distinctJSONPathsAndTypes";
|
||||
|
||||
std::unordered_map<String, std::unordered_set<String>> data;
|
||||
|
||||
void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> & typed_paths_type_names)
|
||||
{
|
||||
for (const auto & [path, _] : column.getTypedPaths())
|
||||
data[path].insert(typed_paths_type_names.at(path));
|
||||
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
|
||||
{
|
||||
if (!dynamic_column->isNullAt(row_num))
|
||||
data[path].insert(dynamic_column->getTypeNameAt(row_num));
|
||||
}
|
||||
|
||||
/// Iterate over paths om shared data in this row and decode the data types.
|
||||
const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
|
||||
const auto & shared_data_offsets = column.getSharedDataOffsets();
|
||||
const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
|
||||
const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
|
||||
for (size_t i = start; i != end; ++i)
|
||||
{
|
||||
auto path = shared_data_paths->getDataAt(i).toString();
|
||||
auto value = shared_data_values->getDataAt(i);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
/// We should not have Nulls here but let's check just in case.
|
||||
chassert(!isNothing(type));
|
||||
data[path].insert(type->getName());
|
||||
}
|
||||
}
|
||||
|
||||
void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> & typed_paths_type_names)
|
||||
{
|
||||
for (const auto & [path, _] : column.getTypedPaths())
|
||||
data[path].insert(typed_paths_type_names.at(path));
|
||||
for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
|
||||
{
|
||||
/// Add dynamic path only if it has at least one non-null value.
|
||||
/// getNumberOfDefaultRows for Dynamic column is O(1).
|
||||
if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
|
||||
dynamic_column->getAllTypeNamesInto(data[path]);
|
||||
}
|
||||
|
||||
/// Iterate over all paths in shared data and decode the data types.
|
||||
const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
|
||||
for (size_t i = 0; i != shared_data_paths->size(); ++i)
|
||||
{
|
||||
auto path = shared_data_paths->getDataAt(i).toString();
|
||||
auto value = shared_data_values->getDataAt(i);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
/// We should not have Nulls here but let's check just in case.
|
||||
chassert(!isNothing(type));
|
||||
data[path].insert(type->getName());
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionDistinctJSONPathsAndTypesData & other)
|
||||
{
|
||||
for (const auto & [path, types] : other.data)
|
||||
data[path].insert(types.begin(), types.end());
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeVarUInt(data.size(), buf);
|
||||
for (const auto & [path, types] : data)
|
||||
{
|
||||
writeStringBinary(path, buf);
|
||||
writeVarUInt(types.size(), buf);
|
||||
for (const auto & type : types)
|
||||
writeStringBinary(type, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
size_t paths_size, types_size;
|
||||
readVarUInt(paths_size, buf);
|
||||
if (paths_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for paths (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size);
|
||||
|
||||
data.reserve(paths_size);
|
||||
String path, type;
|
||||
for (size_t i = 0; i != paths_size; ++i)
|
||||
{
|
||||
readStringBinary(path, buf);
|
||||
readVarUInt(types_size, buf);
|
||||
if (types_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for types (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size);
|
||||
|
||||
data[path].reserve(types_size);
|
||||
for (size_t j = 0; j != types_size; ++j)
|
||||
{
|
||||
readStringBinary(type, buf);
|
||||
data[path].insert(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(IColumn & column)
|
||||
{
|
||||
/// Insert sorted paths and types for better output.
|
||||
auto & array_column = assert_cast<ColumnMap &>(column).getNestedColumn();
|
||||
auto & tuple_column = assert_cast<ColumnTuple &>(array_column.getData());
|
||||
auto & key_column = assert_cast<ColumnString &>(tuple_column.getColumn(0));
|
||||
auto & value_column = assert_cast<ColumnArray &>(tuple_column.getColumn(1));
|
||||
auto & value_column_data = assert_cast<ColumnString &>(value_column.getData());
|
||||
std::vector<std::pair<String, std::vector<String>>> sorted_data;
|
||||
sorted_data.reserve(data.size());
|
||||
for (const auto & [path, types] : data)
|
||||
{
|
||||
std::vector<String> sorted_types(types.begin(), types.end());
|
||||
std::sort(sorted_types.begin(), sorted_types.end());
|
||||
sorted_data.emplace_back(path, std::move(sorted_types));
|
||||
}
|
||||
std::sort(sorted_data.begin(), sorted_data.end());
|
||||
|
||||
for (const auto & [path, types] : sorted_data)
|
||||
{
|
||||
key_column.insertData(path.data(), path.size());
|
||||
for (const auto & type : types)
|
||||
value_column_data.insertData(type.data(), type.size());
|
||||
value_column.getOffsets().push_back(value_column_data.size());
|
||||
}
|
||||
|
||||
array_column.getOffsets().push_back(key_column.size());
|
||||
}
|
||||
|
||||
static DataTypePtr getResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates the list of distinct paths or pairs (path, type) in JSON column.
|
||||
template <typename Data>
|
||||
class AggregateFunctionDistinctJSONPathsAndTypes final : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionDistinctJSONPathsAndTypes(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>(
|
||||
argument_types_, {}, Data::getResultType())
|
||||
{
|
||||
const auto & typed_paths_types = assert_cast<const DataTypeObject &>(*argument_types_[0]).getTypedPaths();
|
||||
typed_paths_type_names.reserve(typed_paths_types.size());
|
||||
for (const auto & [path, type] : typed_paths_types)
|
||||
typed_paths_type_names[path] = type->getName();
|
||||
}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto & object_column = assert_cast<const ColumnObject & >(*columns[0]);
|
||||
this->data(place).add(object_column, row_num, typed_paths_type_names);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
|
||||
IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
|
||||
/// Optimization for case when we add all rows from the column into single place.
|
||||
/// In this case we can avoid iterating over all rows because we can get all paths
|
||||
/// and types in JSON column in a more efficient way.
|
||||
else
|
||||
this->data(place).addWholeColumn(assert_cast<const ColumnObject & >(*columns[0]), typed_paths_type_names);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict /*place*/,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t /*length*/,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
/// Default value for JSON is empty object, so nothing to add.
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).insertResultInto(to);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<String, String> typed_paths_type_names;
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
AggregateFunctionPtr createAggregateFunctionDistinctJSONPathsAndTypes(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
if (argument_types.size() != 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Incorrect number of arguments for aggregate function {}. Expected single argument with type JSON, got {} arguments", name, argument_types.size());
|
||||
|
||||
if (!isObject(argument_types[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type JSON", argument_types[0]->getName(), name);
|
||||
|
||||
return std::make_shared<AggregateFunctionDistinctJSONPathsAndTypes<Data>>(argument_types);
|
||||
}
|
||||
|
||||
void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("distinctJSONPaths", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsData>);
|
||||
factory.registerFunction("distinctJSONPathsAndTypes", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsAndTypesData>);
|
||||
}
|
||||
|
||||
}
|
@ -89,6 +89,8 @@ void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -191,6 +193,8 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionFlameGraph(factory);
|
||||
registerAggregateFunctionKolmogorovSmirnovTest(factory);
|
||||
registerAggregateFunctionLargestTriangleThreeBuckets(factory);
|
||||
registerAggregateFunctionDistinctDynamicTypes(factory);
|
||||
registerAggregateFunctionDistinctJSONPathsAndTypes(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
@ -2699,14 +2699,6 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
|
||||
ReadBufferFromFile in(file_name);
|
||||
readStringUntilEOF(queries_from_file, in);
|
||||
|
||||
if (!getClientConfiguration().has("log_comment"))
|
||||
{
|
||||
Settings settings = client_context->getSettingsCopy();
|
||||
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
|
||||
settings.log_comment = fs::absolute(fs::path(file_name));
|
||||
client_context->setSettings(settings);
|
||||
}
|
||||
|
||||
return executeMultiQuery(queries_from_file);
|
||||
}
|
||||
|
||||
|
@ -979,6 +979,41 @@ ColumnPtr ColumnDynamic::compress() const
|
||||
});
|
||||
}
|
||||
|
||||
String ColumnDynamic::getTypeNameAt(size_t row_num) const
|
||||
{
|
||||
const auto & variant_col = getVariantColumn();
|
||||
const size_t discr = variant_col.globalDiscriminatorAt(row_num);
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
return "";
|
||||
|
||||
if (discr == getSharedVariantDiscriminator())
|
||||
{
|
||||
const auto value = getSharedVariant().getDataAt(variant_col.offsetAt(row_num));
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
return decodeDataType(buf)->getName();
|
||||
}
|
||||
|
||||
return variant_info.variant_names[discr];
|
||||
}
|
||||
|
||||
void ColumnDynamic::getAllTypeNamesInto(std::unordered_set<String> & names) const
|
||||
{
|
||||
auto shared_variant_discr = getSharedVariantDiscriminator();
|
||||
for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
|
||||
{
|
||||
if (i != shared_variant_discr && !variant_column_ptr->getVariantByGlobalDiscriminator(i).empty())
|
||||
names.insert(variant_info.variant_names[i]);
|
||||
}
|
||||
|
||||
const auto & shared_variant = getSharedVariant();
|
||||
for (size_t i = 0; i != shared_variant.size(); ++i)
|
||||
{
|
||||
const auto value = shared_variant.getDataAt(i);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
names.insert(decodeDataType(buf)->getName());
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
|
||||
{
|
||||
if (source_columns.empty())
|
||||
|
@ -430,6 +430,9 @@ public:
|
||||
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) { return getVariantSerialization(variant_type, variant_type->getName()); }
|
||||
|
||||
String getTypeNameAt(size_t row_num) const;
|
||||
void getAllTypeNamesInto(std::unordered_set<String> & names) const;
|
||||
|
||||
private:
|
||||
void createVariantInfo(const DataTypePtr & variant_type);
|
||||
|
||||
|
69
src/Common/tests/gtest_config_host_validation.cpp
Normal file
69
src/Common/tests/gtest_config_host_validation.cpp
Normal file
@ -0,0 +1,69 @@
|
||||
#include <Poco/AutoPtr.h>
|
||||
#include <Poco/DOM/DOMParser.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(Common, ConfigHostValidation)
|
||||
{
|
||||
std::string xml(R"CONFIG(<clickhouse>
|
||||
<IPv4_1>0.0.0.0</IPv4_1>
|
||||
<IPv4_2>192.168.0.1</IPv4_2>
|
||||
<IPv4_3>127.0.0.1</IPv4_3>
|
||||
<IPv4_4>255.255.255.255</IPv4_4>
|
||||
<IPv6_1>2001:0db8:85a3:0000:0000:8a2e:0370:7334</IPv6_1>
|
||||
<IPv6_2>2001:DB8::8a2e:370:7334</IPv6_2>
|
||||
<IPv6_3>::1</IPv6_3>
|
||||
<IPv6_4>::</IPv6_4>
|
||||
<Domain_1>www.example.com.</Domain_1>
|
||||
<Domain_2>a.co</Domain_2>
|
||||
<Domain_3>localhost</Domain_3>
|
||||
<Domain_4>xn--fiqs8s.xn--fiqz9s</Domain_4>
|
||||
<IPv4_Invalid_1>192.168.1.256</IPv4_Invalid_1>
|
||||
<IPv4_Invalid_2>192.168.1.1.1</IPv4_Invalid_2>
|
||||
<IPv4_Invalid_3>192.168.1.99999999999999999999</IPv4_Invalid_3>
|
||||
<IPv4_Invalid_4>192.168.1.a</IPv4_Invalid_4>
|
||||
<IPv6_Invalid_1>2001:0db8:85a3:::8a2e:0370:7334</IPv6_Invalid_1>
|
||||
<IPv6_Invalid_2>1200::AB00:1234::2552:7777:1313</IPv6_Invalid_2>
|
||||
<IPv6_Invalid_3>1200::AB00:1234:Q000:2552:7777:1313</IPv6_Invalid_3>
|
||||
<IPv6_Invalid_4>1200:AB00:1234:2552:7777:1313:FFFF</IPv6_Invalid_4>
|
||||
<Domain_Invalid_1>example.com..</Domain_Invalid_1>
|
||||
<Domain_Invalid_2>5example.com</Domain_Invalid_2>
|
||||
<Domain_Invalid_3>example.com-</Domain_Invalid_3>
|
||||
<Domain_Invalid_4>exa_mple.com</Domain_Invalid_4>
|
||||
</clickhouse>)CONFIG");
|
||||
|
||||
Poco::XML::DOMParser dom_parser;
|
||||
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
|
||||
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
|
||||
|
||||
EXPECT_NO_THROW(config->getHost("IPv4_1"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv4_2"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv4_3"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv4_4"));
|
||||
|
||||
EXPECT_NO_THROW(config->getHost("IPv6_1"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv6_2"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv6_3"));
|
||||
EXPECT_NO_THROW(config->getHost("IPv6_4"));
|
||||
|
||||
EXPECT_NO_THROW(config->getHost("Domain_1"));
|
||||
EXPECT_NO_THROW(config->getHost("Domain_2"));
|
||||
EXPECT_NO_THROW(config->getHost("Domain_3"));
|
||||
EXPECT_NO_THROW(config->getHost("Domain_4"));
|
||||
|
||||
EXPECT_THROW(config->getHost("IPv4_Invalid_1"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv4_Invalid_2"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv4_Invalid_3"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv4_Invalid_4"), Poco::SyntaxException);
|
||||
|
||||
EXPECT_THROW(config->getHost("IPv6_Invalid_1"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv6_Invalid_2"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv6_Invalid_3"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("IPv6_Invalid_4"), Poco::SyntaxException);
|
||||
|
||||
EXPECT_THROW(config->getHost("Domain_Invalid_1"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("Domain_Invalid_2"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("Domain_Invalid_3"), Poco::SyntaxException);
|
||||
EXPECT_THROW(config->getHost("Domain_Invalid_4"), Poco::SyntaxException);
|
||||
}
|
@ -17,11 +17,12 @@
|
||||
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Net/MessageHeader.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
#include <Parsers/IdentifierQuotingStyle.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Poco/Net/MessageHeader.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -85,7 +86,15 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg
|
||||
/// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction.
|
||||
/// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty.
|
||||
if (column)
|
||||
structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false));
|
||||
structure.emplace_back(
|
||||
column->name,
|
||||
column->type->formatWithPossiblyHidingSensitiveData(
|
||||
/*max_length=*/0,
|
||||
/*one_line=*/true,
|
||||
/*show_secrets=*/true,
|
||||
/*print_pretty_type_names=*/false,
|
||||
/*always_quote_identifiers=*/false,
|
||||
/*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage());
|
||||
}
|
||||
@ -102,7 +111,15 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);
|
||||
|
||||
for (size_t i = 0; i < type_list_raw->children.size(); ++i)
|
||||
structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false));
|
||||
structure.emplace_back(
|
||||
"_" + toString(i + 1),
|
||||
type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(
|
||||
/*max_length=*/0,
|
||||
/*one_line=*/true,
|
||||
/*show_secrets=*/true,
|
||||
/*print_pretty_type_names=*/false,
|
||||
/*always_quote_identifiers=*/false,
|
||||
/*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
|
||||
}
|
||||
|
||||
void BaseExternalTable::initSampleBlock()
|
||||
|
@ -1296,6 +1296,9 @@ class IColumn;
|
||||
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
|
||||
M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \
|
||||
M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, "Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple", 0) \
|
||||
\
|
||||
M(Bool, output_format_always_quote_identifiers, false, "Always quote identifiers", 0) \
|
||||
M(IdentifierQuotingStyle, output_format_identifier_quoting_style, IdentifierQuotingStyle::Backticks, "Set the quoting style for identifiers", 0) \
|
||||
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
|
@ -75,6 +75,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
|
||||
{"create_if_not_exists", false, false, "New setting."},
|
||||
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
|
||||
{"output_format_always_quote_identifiers", false, false, "New setting."},
|
||||
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."}
|
||||
}
|
||||
},
|
||||
{"24.8",
|
||||
|
@ -244,4 +244,10 @@ IMPLEMENT_SETTING_ENUM(
|
||||
GroupArrayActionWhenLimitReached,
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(IdentifierQuotingStyle, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"None", IdentifierQuotingStyle::None},
|
||||
{"Backticks", IdentifierQuotingStyle::Backticks},
|
||||
{"DoubleQuotes", IdentifierQuotingStyle::DoubleQuotes},
|
||||
{"BackticksMySQL", IdentifierQuotingStyle::BackticksMySQL}})
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Parsers/ASTSQLSecurity.h>
|
||||
#include <Parsers/IdentifierQuotingStyle.h>
|
||||
#include <QueryPipeline/SizeLimits.h>
|
||||
#include <Common/ShellCommandSettings.h>
|
||||
|
||||
@ -351,6 +352,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateT
|
||||
|
||||
DECLARE_SETTING_ENUM(SQLSecurityType)
|
||||
|
||||
DECLARE_SETTING_ENUM(IdentifierQuotingStyle)
|
||||
|
||||
enum class GroupArrayActionWhenLimitReached : uint8_t
|
||||
{
|
||||
THROW,
|
||||
|
@ -210,7 +210,7 @@ namespace
|
||||
{
|
||||
UInt64 stringToMaxThreads(const String & str)
|
||||
{
|
||||
if (startsWith(str, "auto"))
|
||||
if (startsWith(str, "auto") || startsWith(str, "'auto"))
|
||||
return 0;
|
||||
return parseFromString<UInt64>(str);
|
||||
}
|
||||
@ -237,7 +237,8 @@ SettingFieldMaxThreads & SettingFieldMaxThreads::operator=(const Field & f)
|
||||
String SettingFieldMaxThreads::toString() const
|
||||
{
|
||||
if (is_auto)
|
||||
return "auto(" + ::DB::toString(value) + ")";
|
||||
/// Removing quotes here will introduce an incompatibility between replicas with different versions.
|
||||
return "'auto(" + ::DB::toString(value) + ")'";
|
||||
else
|
||||
return ::DB::toString(value);
|
||||
}
|
||||
|
@ -519,10 +519,10 @@ static DataTypePtr createJSON(const ASTPtr & arguments)
|
||||
if (!context)
|
||||
context = Context::getGlobalContextInstance();
|
||||
|
||||
if (context->getSettingsRef().use_json_alias_for_old_object_type)
|
||||
if (context->getSettingsRef().allow_experimental_object_type && context->getSettingsRef().use_json_alias_for_old_object_type)
|
||||
{
|
||||
if (arguments && !arguments->children.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set setting allow_experimental_json_type = 1");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set settings allow_experimental_json_type = 1 and use_json_alias_for_old_object_type = 0");
|
||||
|
||||
return std::make_shared<DataTypeObjectDeprecated>("JSON", false);
|
||||
}
|
||||
|
@ -302,8 +302,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
|
||||
/// Try to determine the type of value inside quotes
|
||||
auto type = tryInferDataTypeForSingleField(data, format_settings);
|
||||
|
||||
/// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
|
||||
if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)))
|
||||
/// Return String type if one of the following conditions apply
|
||||
/// - we couldn't infer any type
|
||||
/// - it's a number and csv.try_infer_numbers_from_strings = 0
|
||||
/// - it's a tuple and try_infer_strings_from_quoted_tuples = 0
|
||||
/// - it's a Bool type (we don't allow reading bool values from strings)
|
||||
if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)) || isBool(type))
|
||||
return std::make_shared<DataTypeString>();
|
||||
|
||||
return type;
|
||||
|
@ -4134,6 +4134,29 @@ private:
|
||||
};
|
||||
}
|
||||
|
||||
/// Create wrapper only if we support this conversion.
|
||||
WrapperType createWrapperIfCanConvert(const DataTypePtr & from, const DataTypePtr & to) const
|
||||
{
|
||||
try
|
||||
{
|
||||
/// We can avoid try/catch here if we will implement check that 2 types can be casted, but it
|
||||
/// requires quite a lot of work. By now let's simply use try/catch.
|
||||
/// First, check that we can create a wrapper.
|
||||
WrapperType wrapper = prepareUnpackDictionaries(from, to);
|
||||
/// Second, check if we can perform a conversion on column with default value.
|
||||
/// (we cannot just check empty column as we do some checks only during iteration over rows).
|
||||
auto test_col = from->createColumn();
|
||||
test_col->insertDefault();
|
||||
ColumnsWithTypeAndName column_from = {{test_col->getPtr(), from, "" }};
|
||||
wrapper(column_from, to, nullptr, 1);
|
||||
return wrapper;
|
||||
}
|
||||
catch (const Exception &)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const
|
||||
{
|
||||
const auto & variant_types = from_variant.getVariants();
|
||||
@ -4142,7 +4165,19 @@ private:
|
||||
|
||||
/// Create conversion wrapper for each variant.
|
||||
for (const auto & variant_type : variant_types)
|
||||
variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type));
|
||||
{
|
||||
WrapperType wrapper;
|
||||
if (cast_type == CastType::accurateOrNull)
|
||||
{
|
||||
/// Create wrapper only if we support conversion from variant to the resulting type.
|
||||
wrapper = createWrapperIfCanConvert(variant_type, to_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
wrapper = prepareUnpackDictionaries(variant_type, to_type);
|
||||
}
|
||||
variant_wrappers.push_back(wrapper);
|
||||
}
|
||||
|
||||
return [variant_wrappers, variant_types, to_type]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
|
||||
@ -4157,7 +4192,11 @@ private:
|
||||
auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i);
|
||||
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }};
|
||||
const auto & variant_wrapper = variant_wrappers[i];
|
||||
casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
|
||||
ColumnPtr casted_variant;
|
||||
/// Check if we have wrapper for this variant.
|
||||
if (variant_wrapper)
|
||||
casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size());
|
||||
casted_variant_columns.push_back(std::move(casted_variant));
|
||||
}
|
||||
|
||||
/// Second, construct resulting column from casted variant columns according to discriminators.
|
||||
@ -4167,7 +4206,7 @@ private:
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
{
|
||||
auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR || !casted_variant_columns[global_discr])
|
||||
res->insertDefault();
|
||||
else
|
||||
res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i));
|
||||
@ -4357,10 +4396,27 @@ private:
|
||||
casted_variant_columns.reserve(variant_types.size());
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
/// Skip shared variant, it will be processed later.
|
||||
if (i == column_dynamic.getSharedVariantDiscriminator())
|
||||
{
|
||||
casted_variant_columns.push_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i);
|
||||
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}};
|
||||
auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
|
||||
casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
|
||||
WrapperType variant_wrapper;
|
||||
if (cast_type == CastType::accurateOrNull)
|
||||
/// Create wrapper only if we support conversion from variant to the resulting type.
|
||||
variant_wrapper = createWrapperIfCanConvert(variant_types[i], result_type);
|
||||
else
|
||||
variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
|
||||
|
||||
ColumnPtr casted_variant;
|
||||
/// Check if we have wrapper for this variant.
|
||||
if (variant_wrapper)
|
||||
casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size());
|
||||
casted_variant_columns.push_back(casted_variant);
|
||||
}
|
||||
|
||||
/// Second, collect all variants stored in shared variant and cast them to result type.
|
||||
@ -4416,8 +4472,18 @@ private:
|
||||
for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
|
||||
{
|
||||
ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}};
|
||||
auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
|
||||
casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()));
|
||||
WrapperType variant_wrapper;
|
||||
if (cast_type == CastType::accurateOrNull)
|
||||
/// Create wrapper only if we support conversion from variant to the resulting type.
|
||||
variant_wrapper = createWrapperIfCanConvert(variant_types_from_shared_variant[i], result_type);
|
||||
else
|
||||
variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
|
||||
|
||||
ColumnPtr casted_variant;
|
||||
/// Check if we have wrapper for this variant.
|
||||
if (variant_wrapper)
|
||||
casted_variant = variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size());
|
||||
casted_shared_variant_columns.push_back(casted_variant);
|
||||
}
|
||||
|
||||
/// Construct result column from all casted variants.
|
||||
@ -4427,11 +4493,23 @@ private:
|
||||
{
|
||||
auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
res->insertDefault();
|
||||
}
|
||||
else if (global_discr == shared_variant_discr)
|
||||
{
|
||||
if (casted_shared_variant_columns[shared_variant_indexes[i]])
|
||||
res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
|
||||
else
|
||||
res->insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (casted_variant_columns[global_discr])
|
||||
res->insertFrom(*casted_variant_columns[global_discr], offsets[i]);
|
||||
else
|
||||
res->insertDefault();
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -19,16 +20,15 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
/// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')]
|
||||
/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)]
|
||||
template <bool allow_unaligned>
|
||||
class FunctionArrayZip : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "arrayZip";
|
||||
static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayZip>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
@ -39,8 +39,11 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} needs at least one argument; passed {}." , getName(), arguments.size());
|
||||
throw Exception(
|
||||
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} needs at least one argument; passed {}.",
|
||||
getName(),
|
||||
arguments.size());
|
||||
|
||||
DataTypes arguments_types;
|
||||
for (size_t index = 0; index < arguments.size(); ++index)
|
||||
@ -48,56 +51,142 @@ public:
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[index].type.get());
|
||||
|
||||
if (!array_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.",
|
||||
toString(index + 1), getName(), arguments[0].type->getName());
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument {} of function {} must be array. Found {} instead.",
|
||||
toString(index + 1),
|
||||
getName(),
|
||||
arguments[0].type->getName());
|
||||
|
||||
arguments_types.emplace_back(array_type->getNestedType());
|
||||
auto nested_type = array_type->getNestedType();
|
||||
if constexpr (allow_unaligned)
|
||||
nested_type = makeNullable(nested_type);
|
||||
arguments_types.emplace_back(nested_type);
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(arguments_types));
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr
|
||||
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
|
||||
{
|
||||
size_t num_arguments = arguments.size();
|
||||
|
||||
ColumnPtr first_array_column;
|
||||
Columns holders(num_arguments);
|
||||
Columns tuple_columns(num_arguments);
|
||||
|
||||
bool has_unaligned = false;
|
||||
size_t unaligned_index = 0;
|
||||
for (size_t i = 0; i < num_arguments; ++i)
|
||||
{
|
||||
/// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole.
|
||||
ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst();
|
||||
holders[i] = holder;
|
||||
|
||||
const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(holder.get());
|
||||
|
||||
if (!column_array)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.",
|
||||
i + 1, getName(), holder->getName());
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
first_array_column = holder;
|
||||
}
|
||||
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array sizes",
|
||||
i + 1, getName());
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Argument {} of function {} must be array. Found column {} instead.",
|
||||
i + 1,
|
||||
getName(),
|
||||
holder->getName());
|
||||
tuple_columns[i] = column_array->getDataPtr();
|
||||
|
||||
if (i && !column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*holders[0])))
|
||||
{
|
||||
has_unaligned = true;
|
||||
unaligned_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (!allow_unaligned)
|
||||
{
|
||||
if (has_unaligned)
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array sizes",
|
||||
unaligned_index + 1,
|
||||
getName());
|
||||
else
|
||||
return ColumnArray::create(
|
||||
ColumnTuple::create(tuple_columns), static_cast<const ColumnArray &>(*first_array_column).getOffsetsPtr());
|
||||
ColumnTuple::create(std::move(tuple_columns)), static_cast<const ColumnArray &>(*holders[0]).getOffsetsPtr());
|
||||
}
|
||||
else
|
||||
return executeUnaligned(holders, tuple_columns, input_rows_count, has_unaligned);
|
||||
}
|
||||
|
||||
private:
|
||||
ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count, bool has_unaligned) const
|
||||
{
|
||||
std::vector<const ColumnArray *> array_columns(holders.size());
|
||||
for (size_t i = 0; i < holders.size(); ++i)
|
||||
array_columns[i] = checkAndGetColumn<ColumnArray>(holders[i].get());
|
||||
|
||||
for (auto & tuple_column : tuple_columns)
|
||||
tuple_column = makeNullable(tuple_column);
|
||||
|
||||
if (!has_unaligned)
|
||||
return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), array_columns[0]->getOffsetsPtr());
|
||||
|
||||
MutableColumns res_tuple_columns(tuple_columns.size());
|
||||
for (size_t i = 0; i < tuple_columns.size(); ++i)
|
||||
{
|
||||
res_tuple_columns[i] = tuple_columns[i]->cloneEmpty();
|
||||
res_tuple_columns[i]->reserve(tuple_columns[i]->size());
|
||||
}
|
||||
|
||||
auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
|
||||
auto & res_offsets = assert_cast<ColumnArray::ColumnOffsets &>(*res_offsets_column).getData();
|
||||
size_t curr_offset = 0;
|
||||
for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
|
||||
{
|
||||
size_t max_size = 0;
|
||||
for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i)
|
||||
{
|
||||
const auto * array_column = array_columns[arg_i];
|
||||
const auto & offsets = array_column->getOffsets();
|
||||
size_t array_offset = offsets[row_i - 1];
|
||||
size_t array_size = offsets[row_i] - array_offset;
|
||||
|
||||
res_tuple_columns[arg_i]->insertRangeFrom(*tuple_columns[arg_i], array_offset, array_size);
|
||||
max_size = std::max(max_size, array_size);
|
||||
}
|
||||
|
||||
for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i)
|
||||
{
|
||||
const auto * array_column = array_columns[arg_i];
|
||||
const auto & offsets = array_column->getOffsets();
|
||||
size_t array_offset = offsets[row_i - 1];
|
||||
size_t array_size = offsets[row_i] - array_offset;
|
||||
|
||||
res_tuple_columns[arg_i]->insertManyDefaults(max_size - array_size);
|
||||
}
|
||||
|
||||
curr_offset += max_size;
|
||||
res_offsets[row_i] = curr_offset;
|
||||
}
|
||||
|
||||
return ColumnArray::create(ColumnTuple::create(std::move(res_tuple_columns)), std::move(res_offsets_column));
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(ArrayZip)
|
||||
{
|
||||
factory.registerFunction<FunctionArrayZip>();
|
||||
factory.registerFunction<FunctionArrayZip<false>>(
|
||||
{.description = R"(
|
||||
Combines multiple arrays into a single array. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
|
||||
)",
|
||||
.categories{"String"}});
|
||||
|
||||
factory.registerFunction<FunctionArrayZip<true>>(
|
||||
{.description = R"(
|
||||
Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
|
||||
|
||||
If the arrays have different sizes, the shorter arrays will be padded with `null` values.
|
||||
)",
|
||||
.categories{"String"}}
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -396,7 +396,7 @@ const ActionsDAG::Node * ActionsDAG::tryFindInOutputs(const std::string & name)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutpus(const Names & names) const
|
||||
ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutputs(const Names & names) const
|
||||
{
|
||||
NodeRawConstPtrs required_nodes;
|
||||
required_nodes.reserve(names.size());
|
||||
@ -524,7 +524,7 @@ void ActionsDAG::removeUnusedActions(const NameSet & required_names, bool allow_
|
||||
|
||||
void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_remove_inputs, bool allow_constant_folding)
|
||||
{
|
||||
auto required_nodes = findInOutpus(required_names);
|
||||
auto required_nodes = findInOutputs(required_names);
|
||||
outputs.swap(required_nodes);
|
||||
removeUnusedActions(allow_remove_inputs, allow_constant_folding);
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ public:
|
||||
const Node * tryFindInOutputs(const std::string & name) const;
|
||||
|
||||
/// Same, but for the list of names.
|
||||
NodeRawConstPtrs findInOutpus(const Names & names) const;
|
||||
NodeRawConstPtrs findInOutputs(const Names & names) const;
|
||||
|
||||
/// Find first node with the same name in output nodes and replace it.
|
||||
/// If was not found, add node to outputs end.
|
||||
@ -436,7 +436,7 @@ public:
|
||||
/// Returns a list of nodes representing atomic predicates.
|
||||
static NodeRawConstPtrs extractConjunctionAtoms(const Node * predicate);
|
||||
|
||||
/// Get a list of nodes. For every node, check if it can be compused using allowed subset of inputs.
|
||||
/// Get a list of nodes. For every node, check if it can be computed using allowed subset of inputs.
|
||||
/// Returns only those nodes from the list which can be computed.
|
||||
static NodeRawConstPtrs filterNodesByAllowedInputs(
|
||||
NodeRawConstPtrs nodes,
|
||||
|
@ -33,6 +33,8 @@
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -308,16 +310,32 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
|
||||
/* no_squash */ false,
|
||||
/* no_destination */ false,
|
||||
/* async_insert */ false);
|
||||
|
||||
auto table = interpreter.getTable(insert_query);
|
||||
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
||||
|
||||
if (!FormatFactory::instance().isInputFormat(insert_query.format))
|
||||
{
|
||||
if (insert_query.format.empty() && insert_query.infile)
|
||||
{
|
||||
const auto & in_file_node = insert_query.infile->as<ASTLiteral &>();
|
||||
const auto in_file = in_file_node.value.safeGet<std::string>();
|
||||
const auto in_file_format = FormatFactory::instance().getFormatFromFileName(in_file);
|
||||
if (!FormatFactory::instance().isInputFormat(in_file_format))
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input INFILE format {}", in_file_format);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format);
|
||||
}
|
||||
|
||||
/// For table functions we check access while executing
|
||||
/// InterpreterInsertQuery::getTable() -> ITableFunction::execute().
|
||||
if (insert_query.table_id)
|
||||
query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames());
|
||||
|
||||
insert_query.columns = std::make_shared<ASTExpressionList>();
|
||||
for (const auto & column : sample_block)
|
||||
insert_query.columns->children.push_back(std::make_shared<ASTIdentifier>(column.name));
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::PushResult
|
||||
@ -696,6 +714,17 @@ catch (...)
|
||||
tryLogCurrentException("AsynchronousInsertQueue", "Failed to add elements to AsynchronousInsertLog");
|
||||
}
|
||||
|
||||
void convertBlockToHeader(Block & block, const Block & header)
|
||||
{
|
||||
auto converting_dag = ActionsDAG::makeConvertingActions(
|
||||
block.getColumnsWithTypeAndName(),
|
||||
header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name);
|
||||
|
||||
auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
|
||||
converting_actions->execute(block);
|
||||
}
|
||||
|
||||
String serializeQuery(const IAST & query, size_t max_length)
|
||||
{
|
||||
return query.hasSecretParts()
|
||||
@ -791,6 +820,61 @@ try
|
||||
if (async_insert_log)
|
||||
log_elements.reserve(data->entries.size());
|
||||
|
||||
auto add_entry_to_asynchronous_insert_log = [&, query_by_format = NameToNameMap{}](
|
||||
const InsertData::EntryPtr & entry,
|
||||
const String & parsing_exception,
|
||||
size_t num_rows,
|
||||
size_t num_bytes) mutable
|
||||
{
|
||||
if (!async_insert_log)
|
||||
return;
|
||||
|
||||
AsynchronousInsertLogElement elem;
|
||||
elem.event_time = timeInSeconds(entry->create_time);
|
||||
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
|
||||
elem.database = query_database;
|
||||
elem.table = query_table;
|
||||
elem.format = entry->format;
|
||||
elem.query_id = entry->query_id;
|
||||
elem.bytes = num_bytes;
|
||||
elem.rows = num_rows;
|
||||
elem.exception = parsing_exception;
|
||||
elem.data_kind = entry->chunk.getDataKind();
|
||||
elem.timeout_milliseconds = data->timeout_ms.count();
|
||||
elem.flush_query_id = insert_query_id;
|
||||
|
||||
auto get_query_by_format = [&](const String & format) -> const String &
|
||||
{
|
||||
auto [it, inserted] = query_by_format.try_emplace(format);
|
||||
if (!inserted)
|
||||
return it->second;
|
||||
|
||||
auto query = key.query->clone();
|
||||
assert_cast<ASTInsertQuery &>(*query).format = format;
|
||||
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
|
||||
return it->second;
|
||||
};
|
||||
|
||||
if (entry->chunk.getDataKind() == DataKind::Parsed)
|
||||
elem.query_for_logging = key.query_str;
|
||||
else
|
||||
elem.query_for_logging = get_query_by_format(entry->format);
|
||||
|
||||
/// If there was a parsing error,
|
||||
/// the entry won't be flushed anyway,
|
||||
/// so add the log element immediately.
|
||||
if (!elem.exception.empty())
|
||||
{
|
||||
elem.status = AsynchronousInsertLogElement::ParsingError;
|
||||
async_insert_log->add(std::move(elem));
|
||||
}
|
||||
else
|
||||
{
|
||||
elem.status = AsynchronousInsertLogElement::Ok;
|
||||
log_elements.push_back(std::move(elem));
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
interpreter = std::make_unique<InterpreterInsertQuery>(
|
||||
@ -819,49 +903,20 @@ try
|
||||
catch (...)
|
||||
{
|
||||
logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds());
|
||||
|
||||
if (async_insert_log)
|
||||
{
|
||||
for (const auto & entry : data->entries)
|
||||
add_entry_to_asynchronous_insert_log(entry, /*parsing_exception=*/ "", /*num_rows=*/ 0, entry->chunk.byteSize());
|
||||
|
||||
auto exception = getCurrentExceptionMessage(false);
|
||||
auto flush_time = std::chrono::system_clock::now();
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
auto add_entry_to_asynchronous_insert_log = [&](const auto & entry,
|
||||
const auto & entry_query_for_logging,
|
||||
const auto & exception,
|
||||
size_t num_rows,
|
||||
size_t num_bytes,
|
||||
Milliseconds timeout_ms)
|
||||
{
|
||||
if (!async_insert_log)
|
||||
return;
|
||||
|
||||
AsynchronousInsertLogElement elem;
|
||||
elem.event_time = timeInSeconds(entry->create_time);
|
||||
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
|
||||
elem.query_for_logging = entry_query_for_logging;
|
||||
elem.database = query_database;
|
||||
elem.table = query_table;
|
||||
elem.format = entry->format;
|
||||
elem.query_id = entry->query_id;
|
||||
elem.bytes = num_bytes;
|
||||
elem.rows = num_rows;
|
||||
elem.exception = exception;
|
||||
elem.data_kind = entry->chunk.getDataKind();
|
||||
elem.timeout_milliseconds = timeout_ms.count();
|
||||
elem.flush_query_id = insert_query_id;
|
||||
|
||||
/// If there was a parsing error,
|
||||
/// the entry won't be flushed anyway,
|
||||
/// so add the log element immediately.
|
||||
if (!elem.exception.empty())
|
||||
{
|
||||
elem.status = AsynchronousInsertLogElement::ParsingError;
|
||||
async_insert_log->add(std::move(elem));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_elements.push_back(elem);
|
||||
}
|
||||
};
|
||||
|
||||
auto finish_entries = [&]
|
||||
auto finish_entries = [&](size_t num_rows, size_t num_bytes)
|
||||
{
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
@ -874,20 +929,7 @@ try
|
||||
auto flush_time = std::chrono::system_clock::now();
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, "");
|
||||
}
|
||||
};
|
||||
|
||||
Chunk chunk;
|
||||
auto header = pipeline.getHeader();
|
||||
|
||||
if (key.data_kind == DataKind::Parsed)
|
||||
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
|
||||
else
|
||||
chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
|
||||
|
||||
auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes)
|
||||
{
|
||||
LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
|
||||
queue_shard_flush_time_history.updateWithCurrentTime();
|
||||
|
||||
@ -896,16 +938,24 @@ try
|
||||
query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
Chunk chunk;
|
||||
auto header = pipeline.getHeader();
|
||||
|
||||
if (key.data_kind == DataKind::Parsed)
|
||||
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
|
||||
else
|
||||
chunk = processPreprocessedEntries(data, header, add_entry_to_asynchronous_insert_log);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
|
||||
|
||||
if (chunk.getNumRows() == 0)
|
||||
{
|
||||
finish_entries();
|
||||
log_and_add_finish_to_query_log(0, 0);
|
||||
finish_entries(/*num_rows=*/ 0, /*num_bytes=*/ 0);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
size_t num_rows = chunk.getNumRows();
|
||||
size_t num_bytes = chunk.bytes();
|
||||
|
||||
@ -915,7 +965,7 @@ try
|
||||
CompletedPipelineExecutor completed_executor(pipeline);
|
||||
completed_executor.execute();
|
||||
|
||||
log_and_add_finish_to_query_log(num_rows, num_bytes);
|
||||
finish_entries(num_rows, num_bytes);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -929,8 +979,6 @@ try
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
finish_entries();
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
@ -991,7 +1039,6 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
|
||||
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
|
||||
auto chunk_info = std::make_shared<AsyncInsertInfo>();
|
||||
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
|
||||
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
@ -1009,7 +1056,8 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
size_t num_rows = executor.execute(*buffer);
|
||||
|
||||
total_rows += num_rows;
|
||||
/// for some reason, client can pass zero rows and bytes to server.
|
||||
|
||||
/// For some reason, client can pass zero rows and bytes to server.
|
||||
/// We don't update offsets in this case, because we assume every insert has some rows during dedup
|
||||
/// but we have nothing to deduplicate for this insert.
|
||||
if (num_rows > 0)
|
||||
@ -1018,8 +1066,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
}
|
||||
|
||||
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
|
||||
|
||||
add_to_async_insert_log(entry, current_exception, num_rows, num_bytes);
|
||||
current_exception.clear();
|
||||
entry->resetChunk();
|
||||
}
|
||||
@ -1031,30 +1078,14 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
|
||||
template <typename LogFunc>
|
||||
Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
||||
const InsertQuery & key,
|
||||
const InsertDataPtr & data,
|
||||
const Block & header,
|
||||
const ContextPtr & insert_context,
|
||||
LogFunc && add_to_async_insert_log)
|
||||
{
|
||||
size_t total_rows = 0;
|
||||
auto chunk_info = std::make_shared<AsyncInsertInfo>();
|
||||
auto result_columns = header.cloneEmptyColumns();
|
||||
|
||||
std::unordered_map<String, String> format_to_query;
|
||||
|
||||
auto get_query_by_format = [&](const String & format) -> const String &
|
||||
{
|
||||
auto [it, inserted] = format_to_query.try_emplace(format);
|
||||
if (!inserted)
|
||||
return it->second;
|
||||
|
||||
auto query = key.query->clone();
|
||||
assert_cast<ASTInsertQuery &>(*query).format = format;
|
||||
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
|
||||
return it->second;
|
||||
};
|
||||
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
const auto * block = entry->chunk.asBlock();
|
||||
@ -1062,23 +1093,26 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected entry with data kind Preprocessed. Got: {}", entry->chunk.getDataKind());
|
||||
|
||||
auto columns = block->getColumns();
|
||||
Block block_to_insert = *block;
|
||||
if (!isCompatibleHeader(block_to_insert, header))
|
||||
convertBlockToHeader(block_to_insert, header);
|
||||
|
||||
auto columns = block_to_insert.getColumns();
|
||||
for (size_t i = 0, s = columns.size(); i < s; ++i)
|
||||
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
|
||||
|
||||
total_rows += block->rows();
|
||||
/// for some reason, client can pass zero rows and bytes to server.
|
||||
total_rows += block_to_insert.rows();
|
||||
|
||||
/// For some reason, client can pass zero rows and bytes to server.
|
||||
/// We don't update offsets in this case, because we assume every insert has some rows during dedup,
|
||||
/// but we have nothing to deduplicate for this insert.
|
||||
if (block->rows())
|
||||
if (block_to_insert.rows() > 0)
|
||||
{
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
}
|
||||
|
||||
const auto & query_for_logging = get_query_by_format(entry->format);
|
||||
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
|
||||
|
||||
add_to_async_insert_log(entry, /*parsing_exception=*/ "", block_to_insert.rows(), block_to_insert.bytes());
|
||||
entry->resetChunk();
|
||||
}
|
||||
|
||||
|
@ -288,10 +288,8 @@ private:
|
||||
|
||||
template <typename LogFunc>
|
||||
static Chunk processPreprocessedEntries(
|
||||
const InsertQuery & key,
|
||||
const InsertDataPtr & data,
|
||||
const Block & header,
|
||||
const ContextPtr & insert_context,
|
||||
LogFunc && add_to_async_insert_log);
|
||||
|
||||
template <typename E>
|
||||
|
@ -33,25 +33,19 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// Duplicate of code from StringUtils.h. Copied here for less dependencies.
|
||||
static bool startsWith(const std::string & s, const char * prefix)
|
||||
{
|
||||
return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
|
||||
}
|
||||
|
||||
Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
String prefix = "lemmatizers";
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
const String prefix = "lemmatizers";
|
||||
|
||||
if (!config.has(prefix))
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "No lemmatizers specified in server config on prefix '{}'", prefix);
|
||||
return;
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(prefix, keys);
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "lemmatizer"))
|
||||
if (key.starts_with("lemmatizer"))
|
||||
{
|
||||
const auto & lemm_name = config.getString(prefix + "." + key + ".lang", "");
|
||||
const auto & lemm_path = config.getString(prefix + "." + key + ".path", "");
|
||||
@ -81,13 +75,13 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name)
|
||||
if (paths.find(name) != paths.end())
|
||||
{
|
||||
if (!std::filesystem::exists(paths[name]))
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Incorrect path to lemmatizer: {}", paths[name]);
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Path to lemmatizer does not exist: {}", paths[name]);
|
||||
|
||||
lemmatizers[name] = std::make_shared<Lemmatizer>(paths[name]);
|
||||
return lemmatizers[name];
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer named: '{}' is not found", name);
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer with the name '{}' was not found in the configuration", name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -147,6 +147,7 @@ ColumnDependencies getAllColumnDependencies(
|
||||
|
||||
bool isStorageTouchedByMutations(
|
||||
MergeTreeData::DataPartPtr source_part,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const std::vector<MutationCommand> & commands,
|
||||
ContextPtr context)
|
||||
@ -154,7 +155,7 @@ bool isStorageTouchedByMutations(
|
||||
if (commands.empty())
|
||||
return false;
|
||||
|
||||
auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
|
||||
auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part, mutations_snapshot);
|
||||
bool all_commands_can_be_skipped = true;
|
||||
|
||||
for (const auto & command : commands)
|
||||
@ -285,8 +286,13 @@ MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(st
|
||||
{
|
||||
}
|
||||
|
||||
MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_)
|
||||
: data(&storage_), part(std::move(source_part_))
|
||||
MutationsInterpreter::Source::Source(
|
||||
MergeTreeData & storage_,
|
||||
MergeTreeData::DataPartPtr source_part_,
|
||||
AlterConversionsPtr alter_conversions_)
|
||||
: data(&storage_)
|
||||
, part(std::move(source_part_))
|
||||
, alter_conversions(std::move(alter_conversions_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -386,13 +392,14 @@ MutationsInterpreter::MutationsInterpreter(
|
||||
MutationsInterpreter::MutationsInterpreter(
|
||||
MergeTreeData & storage_,
|
||||
MergeTreeData::DataPartPtr source_part_,
|
||||
AlterConversionsPtr alter_conversions_,
|
||||
StorageMetadataPtr metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
Names available_columns_,
|
||||
ContextPtr context_,
|
||||
Settings settings_)
|
||||
: MutationsInterpreter(
|
||||
Source(storage_, std::move(source_part_)),
|
||||
Source(storage_, std::move(source_part_), std::move(alter_conversions_)),
|
||||
std::move(metadata_snapshot_), std::move(commands_),
|
||||
std::move(available_columns_), std::move(context_), std::move(settings_))
|
||||
{
|
||||
@ -1218,8 +1225,10 @@ void MutationsInterpreter::Source::read(
|
||||
createReadFromPartStep(
|
||||
MergeTreeSequentialSourceType::Mutation,
|
||||
plan,
|
||||
*data, storage_snapshot,
|
||||
*data,
|
||||
storage_snapshot,
|
||||
part,
|
||||
alter_conversions,
|
||||
required_columns,
|
||||
nullptr,
|
||||
apply_deleted_mask_,
|
||||
|
@ -20,6 +20,7 @@ using QueryPipelineBuilderPtr = std::unique_ptr<QueryPipelineBuilder>;
|
||||
/// Return false if the data isn't going to be changed by mutations.
|
||||
bool isStorageTouchedByMutations(
|
||||
MergeTreeData::DataPartPtr source_part,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const std::vector<MutationCommand> & commands,
|
||||
ContextPtr context
|
||||
@ -70,6 +71,7 @@ public:
|
||||
MutationsInterpreter(
|
||||
MergeTreeData & storage_,
|
||||
MergeTreeData::DataPartPtr source_part_,
|
||||
AlterConversionsPtr alter_conversions_,
|
||||
StorageMetadataPtr metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
Names available_columns_,
|
||||
@ -137,7 +139,7 @@ public:
|
||||
bool can_execute_) const;
|
||||
|
||||
explicit Source(StoragePtr storage_);
|
||||
Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_);
|
||||
Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_, AlterConversionsPtr alter_conversions_);
|
||||
|
||||
private:
|
||||
StoragePtr storage;
|
||||
@ -145,6 +147,7 @@ public:
|
||||
/// Special case for *MergeTree.
|
||||
MergeTreeData * data = nullptr;
|
||||
MergeTreeData::DataPartPtr part;
|
||||
AlterConversionsPtr alter_conversions;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -30,6 +30,8 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription()
|
||||
{"id", std::make_shared<DataTypeUInt64>(), "ID of processor."},
|
||||
{"parent_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "Parent processors IDs."},
|
||||
{"plan_step", std::make_shared<DataTypeUInt64>(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."},
|
||||
{"plan_step_name", std::make_shared<DataTypeString>(), "Name of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
|
||||
{"plan_step_description", std::make_shared<DataTypeString>(), "Description of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
|
||||
{"plan_group", std::make_shared<DataTypeUInt64>(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."},
|
||||
|
||||
{"initial_query_id", std::make_shared<DataTypeString>(), "ID of the initial query (for distributed query execution)."},
|
||||
@ -64,6 +66,8 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const
|
||||
}
|
||||
|
||||
columns[i++]->insert(plan_step);
|
||||
columns[i++]->insert(plan_step_name);
|
||||
columns[i++]->insert(plan_step_description);
|
||||
columns[i++]->insert(plan_group);
|
||||
columns[i++]->insertData(initial_query_id.data(), initial_query_id.size());
|
||||
columns[i++]->insertData(query_id.data(), query_id.size());
|
||||
|
@ -19,6 +19,8 @@ struct ProcessorProfileLogElement
|
||||
|
||||
UInt64 plan_step{};
|
||||
UInt64 plan_group{};
|
||||
String plan_step_name;
|
||||
String plan_step_description;
|
||||
|
||||
String initial_query_id;
|
||||
String query_id;
|
||||
|
@ -478,6 +478,8 @@ void logQueryFinish(
|
||||
processor_elem.parent_ids = std::move(parents);
|
||||
|
||||
processor_elem.plan_step = reinterpret_cast<std::uintptr_t>(processor->getQueryPlanStep());
|
||||
processor_elem.plan_step_name = processor->getPlanStepName();
|
||||
processor_elem.plan_step_description = processor->getPlanStepDescription();
|
||||
processor_elem.plan_group = processor->getQueryPlanStepGroup();
|
||||
|
||||
processor_elem.processor_name = processor->getName();
|
||||
@ -793,7 +795,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
/// Verify that AST formatting is consistent:
|
||||
/// If you format AST, parse it back, and format it again, you get the same string.
|
||||
|
||||
String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false);
|
||||
String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);
|
||||
|
||||
/// The query can become more verbose after formatting, so:
|
||||
size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0;
|
||||
@ -818,7 +820,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
|
||||
chassert(ast2);
|
||||
|
||||
String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false);
|
||||
String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);
|
||||
|
||||
if (formatted1 != formatted2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
|
@ -26,7 +26,12 @@ inline String format(const SecretHidingFormatSettings & settings)
|
||||
&& settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect);
|
||||
|
||||
return settings.query.formatWithPossiblyHidingSensitiveData(
|
||||
settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names);
|
||||
settings.max_length,
|
||||
settings.one_line,
|
||||
show_secrets,
|
||||
settings.ctx->getSettingsRef().print_pretty_type_names,
|
||||
settings.ctx->getSettingsRef().output_format_always_quote_identifiers,
|
||||
settings.ctx->getSettingsRef().output_format_identifier_quoting_style);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -66,8 +66,8 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo
|
||||
{
|
||||
frame.need_parens = false;
|
||||
|
||||
/// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query.
|
||||
format_settings.ostr << backQuote(name);
|
||||
/// We have to always quote column names to avoid ambiguity with INDEX and other declarations in CREATE query.
|
||||
format_settings.quoteIdentifier(name);
|
||||
|
||||
if (type)
|
||||
{
|
||||
|
@ -35,7 +35,7 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin
|
||||
{
|
||||
frame.need_parens = false;
|
||||
|
||||
settings.ostr << backQuote(name);
|
||||
settings.quoteIdentifier(name);
|
||||
|
||||
if (type)
|
||||
{
|
||||
|
@ -79,7 +79,7 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta
|
||||
}
|
||||
else
|
||||
{
|
||||
s.ostr << backQuoteIfNeed(name);
|
||||
s.writeIdentifier(name);
|
||||
s.ostr << " ";
|
||||
expr->formatImpl(s, state, frame);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ ASTPtr ASTProjectionDeclaration::clone() const
|
||||
|
||||
void ASTProjectionDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
settings.ostr << backQuoteIfNeed(name);
|
||||
settings.writeIdentifier(name);
|
||||
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
|
||||
std::string nl_or_nothing = settings.one_line ? "" : "\n";
|
||||
settings.ostr << settings.nl_or_ws << indent_str << "(" << nl_or_nothing;
|
||||
|
@ -22,10 +22,8 @@ ASTPtr ASTTableOverride::clone() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState & state, FormatStateStacked frame) const
|
||||
void ASTTableOverride::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
FormatSettings settings = settings_;
|
||||
settings.always_quote_identifiers = true;
|
||||
String nl_or_nothing = settings.one_line ? "" : "\n";
|
||||
String nl_or_ws = settings.one_line ? " " : "\n";
|
||||
String hl_keyword = settings.hilite ? hilite_keyword : "";
|
||||
|
@ -165,12 +165,21 @@ size_t IAST::checkDepthImpl(size_t max_depth) const
|
||||
return res;
|
||||
}
|
||||
|
||||
String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const
|
||||
String IAST::formatWithPossiblyHidingSensitiveData(
|
||||
size_t max_length,
|
||||
bool one_line,
|
||||
bool show_secrets,
|
||||
bool print_pretty_type_names,
|
||||
bool always_quote_identifiers,
|
||||
IdentifierQuotingStyle identifier_quoting_style) const
|
||||
{
|
||||
|
||||
WriteBufferFromOwnString buf;
|
||||
FormatSettings settings(buf, one_line);
|
||||
settings.show_secrets = show_secrets;
|
||||
settings.print_pretty_type_names = print_pretty_type_names;
|
||||
settings.always_quote_identifiers = always_quote_identifiers;
|
||||
settings.identifier_quoting_style = identifier_quoting_style;
|
||||
format(settings);
|
||||
return wipeSensitiveDataAndCutToLength(buf.str(), max_length);
|
||||
}
|
||||
@ -248,6 +257,34 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IAST::FormatSettings::quoteIdentifier(const String & name) const
|
||||
{
|
||||
switch (identifier_quoting_style)
|
||||
{
|
||||
case IdentifierQuotingStyle::None:
|
||||
{
|
||||
writeBackQuotedString(name, ostr);
|
||||
break;
|
||||
}
|
||||
case IdentifierQuotingStyle::Backticks:
|
||||
{
|
||||
writeBackQuotedString(name, ostr);
|
||||
break;
|
||||
}
|
||||
case IdentifierQuotingStyle::DoubleQuotes:
|
||||
{
|
||||
writeDoubleQuotedString(name, ostr);
|
||||
break;
|
||||
}
|
||||
case IdentifierQuotingStyle::BackticksMySQL:
|
||||
{
|
||||
writeBackQuotedStringMySQL(name, ostr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const
|
||||
{
|
||||
String indent_str(indent, '-');
|
||||
|
@ -238,6 +238,9 @@ public:
|
||||
}
|
||||
|
||||
void writeIdentifier(const String & name) const;
|
||||
// Quote identifier `name` even when `always_quote_identifiers` is false.
|
||||
// If `identifier_quoting_style` is `IdentifierQuotingStyle::None`, quote it with `IdentifierQuotingStyle::Backticks`
|
||||
void quoteIdentifier(const String & name) const;
|
||||
};
|
||||
|
||||
/// State. For example, a set of nodes can be remembered, which we already walk through.
|
||||
@ -278,7 +281,13 @@ public:
|
||||
|
||||
/// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied.
|
||||
/// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience.
|
||||
String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const;
|
||||
String formatWithPossiblyHidingSensitiveData(
|
||||
size_t max_length,
|
||||
bool one_line,
|
||||
bool show_secrets,
|
||||
bool print_pretty_type_names,
|
||||
bool always_quote_identifiers,
|
||||
IdentifierQuotingStyle identifier_quoting_style) const;
|
||||
|
||||
/** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent
|
||||
* behaviour is due to the fact such functions are called from Client which knows nothing about
|
||||
@ -287,12 +296,12 @@ public:
|
||||
*/
|
||||
String formatForLogging(size_t max_length = 0) const
|
||||
{
|
||||
return formatWithPossiblyHidingSensitiveData(max_length, true, false, false);
|
||||
return formatWithPossiblyHidingSensitiveData(max_length, true, false, false, false, IdentifierQuotingStyle::Backticks);
|
||||
}
|
||||
|
||||
String formatForErrorMessage() const
|
||||
{
|
||||
return formatWithPossiblyHidingSensitiveData(0, true, false, false);
|
||||
return formatWithPossiblyHidingSensitiveData(0, true, false, false, false, IdentifierQuotingStyle::Backticks);
|
||||
}
|
||||
|
||||
virtual bool hasSecretParts() const { return childrenHaveSecretParts(); }
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/ParserStringAndSubstitution.h>
|
||||
#include <Parsers/ParserAlterQuery.h>
|
||||
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
@ -9,14 +11,19 @@
|
||||
#include <Parsers/ParserRefreshStrategy.h>
|
||||
#include <Parsers/ParserSelectWithUnionQuery.h>
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ParserStringAndSubstitution.h>
|
||||
#include <Parsers/parseDatabaseAndTableName.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SYNTAX_ERROR;
|
||||
}
|
||||
|
||||
bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
auto command = std::make_shared<ASTAlterCommand>();
|
||||
@ -122,7 +129,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ParserCompoundIdentifier parser_name;
|
||||
ParserStringLiteral parser_string_literal;
|
||||
ParserStringAndSubstitution parser_string_and_substituion;
|
||||
ParserIdentifier parser_remove_property;
|
||||
ParserCompoundColumnDeclaration parser_col_decl;
|
||||
ParserIndexDeclaration parser_idx_decl;
|
||||
ParserStatisticsDeclaration parser_stat_decl;
|
||||
@ -725,8 +731,21 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
if (!parser_modify_col_decl.parse(pos, command_col_decl, expected))
|
||||
return false;
|
||||
|
||||
auto check_no_type = [&](const std::string_view keyword)
|
||||
{
|
||||
const auto & column_decl = command_col_decl->as<const ASTColumnDeclaration &>();
|
||||
|
||||
if (!column_decl.children.empty() || column_decl.null_modifier.has_value() || !column_decl.default_specifier.empty()
|
||||
|| column_decl.ephemeral_default || column_decl.primary_key_specifier)
|
||||
{
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot specify column properties before '{}'", keyword);
|
||||
}
|
||||
};
|
||||
|
||||
if (s_remove.ignore(pos, expected))
|
||||
{
|
||||
check_no_type(s_remove.getName());
|
||||
|
||||
if (s_default.ignore(pos, expected))
|
||||
command->remove_property = toStringView(Keyword::DEFAULT);
|
||||
else if (s_materialized.ignore(pos, expected))
|
||||
@ -746,11 +765,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
}
|
||||
else if (s_modify_setting.ignore(pos, expected))
|
||||
{
|
||||
check_no_type(s_modify_setting.getName());
|
||||
|
||||
if (!parser_settings.parse(pos, command_settings_changes, expected))
|
||||
return false;
|
||||
}
|
||||
else if (s_reset_setting.ignore(pos, expected))
|
||||
{
|
||||
check_no_type(s_reset_setting.getName());
|
||||
|
||||
if (!parser_reset_setting.parse(pos, command_settings_resets, expected))
|
||||
return false;
|
||||
}
|
||||
@ -765,6 +788,11 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
}
|
||||
}
|
||||
command->type = ASTAlterCommand::MODIFY_COLUMN;
|
||||
|
||||
/// Make sure that type is not populated when REMOVE/MODIFY SETTING/RESET SETTING is used, because we wouldn't modify the type, which can be confusing
|
||||
chassert(
|
||||
nullptr == command_col_decl->as<const ASTColumnDeclaration &>().type
|
||||
|| (command->remove_property.empty() && nullptr == command_settings_changes && nullptr == command_settings_resets));
|
||||
}
|
||||
else if (s_modify_order_by.ignore(pos, expected))
|
||||
{
|
||||
|
@ -231,31 +231,31 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest,
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))",
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n PARTITION BY toYYYYMM(`created`)\n)"
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n PARTITION BY toYYYYMM(created)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())",
|
||||
"CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE `tbl`\n(\n\n),\nTABLE OVERRIDE `a`\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE `b`\n(\n PARTITION BY rand()\n)"
|
||||
"CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE tbl\n(\n\n),\nTABLE OVERRIDE a\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE b\n(\n PARTITION BY rand()\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))",
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(`created`)\n)"
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(created)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))",
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n INDEX foo `foo` TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(`_staged` = 1, 'staging', toYYYYMM(`created`))\n)"
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n INDEX foo foo TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created))\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)",
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE `t1`\n(\n TTL `inserted` + toIntervalMonth(1)\n),\nTABLE OVERRIDE `t2`\n(\n TTL `inserted` + toIntervalMonth(2)\n)"
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE t1\n(\n TTL inserted + toIntervalMonth(1)\n),\nTABLE OVERRIDE t2\n(\n TTL inserted + toIntervalMonth(2)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)",
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE `tab3`\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n),\nTABLE OVERRIDE `tab5`\n(\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n)"
|
||||
"CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE tab3\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n),\nTABLE OVERRIDE tab5\n(\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))",
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(`created`)\n)"
|
||||
"CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(created)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1",
|
||||
@ -267,11 +267,11 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest,
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))",
|
||||
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)"
|
||||
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)"
|
||||
},
|
||||
{
|
||||
"CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))",
|
||||
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)\nCOMMENT 'db comment'"
|
||||
"CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)\nCOMMENT 'db comment'"
|
||||
}
|
||||
})));
|
||||
|
||||
|
@ -1287,7 +1287,8 @@ void Planner::buildPlanForUnionNode()
|
||||
|
||||
for (const auto & query_node : union_queries_nodes)
|
||||
{
|
||||
Planner query_planner(query_node, select_query_options);
|
||||
Planner query_planner(query_node, select_query_options, planner_context->getGlobalPlannerContext());
|
||||
|
||||
query_planner.buildQueryPlanIfNeeded();
|
||||
for (const auto & row_policy : query_planner.getUsedRowPolicies())
|
||||
used_row_policies.insert(row_policy);
|
||||
|
@ -494,6 +494,12 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
necessary_names.push_back(name);
|
||||
};
|
||||
|
||||
bool is_join_with_special_storage = false;
|
||||
if (const auto * right_table_node = join_node.getRightTableExpression()->as<TableNode>())
|
||||
{
|
||||
is_join_with_special_storage = dynamic_cast<const StorageJoin *>(right_table_node->getStorage().get());
|
||||
}
|
||||
|
||||
for (auto & join_clause : result.join_clauses)
|
||||
{
|
||||
const auto & left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();
|
||||
@ -561,7 +567,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
if (!left_key_node->result_type->equals(*common_type))
|
||||
left_key_node = &left_join_actions.addCast(*left_key_node, common_type, {});
|
||||
|
||||
if (!right_key_node->result_type->equals(*common_type))
|
||||
if (!is_join_with_special_storage && !right_key_node->result_type->equals(*common_type))
|
||||
right_key_node = &right_join_actions.addCast(*right_key_node, common_type, {});
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <iostream>
|
||||
#include <Processors/IProcessor.h>
|
||||
#include <Processors/QueryPlan/IQueryPlanStep.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -9,6 +10,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void IProcessor::setQueryPlanStep(IQueryPlanStep * step, size_t group)
|
||||
{
|
||||
query_plan_step = step;
|
||||
query_plan_step_group = group;
|
||||
if (step)
|
||||
{
|
||||
plan_step_name = step->getName();
|
||||
plan_step_description = step->getStepDescription();
|
||||
}
|
||||
}
|
||||
|
||||
void IProcessor::cancel() noexcept
|
||||
{
|
||||
|
||||
|
@ -311,14 +311,12 @@ public:
|
||||
constexpr static size_t NO_STREAM = std::numeric_limits<size_t>::max();
|
||||
|
||||
/// Step of QueryPlan from which processor was created.
|
||||
void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0)
|
||||
{
|
||||
query_plan_step = step;
|
||||
query_plan_step_group = group;
|
||||
}
|
||||
void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0);
|
||||
|
||||
IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; }
|
||||
size_t getQueryPlanStepGroup() const { return query_plan_step_group; }
|
||||
const String & getPlanStepName() const { return plan_step_name; }
|
||||
const String & getPlanStepDescription() const { return plan_step_description; }
|
||||
|
||||
uint64_t getElapsedNs() const { return elapsed_ns; }
|
||||
uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; }
|
||||
@ -410,6 +408,8 @@ private:
|
||||
|
||||
IQueryPlanStep * query_plan_step = nullptr;
|
||||
size_t query_plan_step_group = 0;
|
||||
String plan_step_name;
|
||||
String plan_step_description;
|
||||
};
|
||||
|
||||
|
||||
|
@ -757,7 +757,7 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
|
||||
projection_reading = reader.readFromParts(
|
||||
/* parts = */ {},
|
||||
/* alter_conversions = */ {},
|
||||
reading->getMutationsSnapshot()->cloneEmpty(),
|
||||
best_candidate->dag.getRequiredColumnsNames(),
|
||||
proj_snapshot,
|
||||
projection_query_info,
|
||||
|
@ -199,7 +199,7 @@ std::optional<String> optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod
|
||||
|
||||
auto projection_reading = reader.readFromParts(
|
||||
/*parts=*/ {},
|
||||
/*alter_conversions=*/ {},
|
||||
reading->getMutationsSnapshot()->cloneEmpty(),
|
||||
required_columns,
|
||||
proj_snapshot,
|
||||
query_info_copy,
|
||||
|
@ -41,12 +41,19 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
|
||||
if (reading->readsInOrder())
|
||||
return false;
|
||||
|
||||
const auto & query_settings = reading->getContext()->getSettingsRef();
|
||||
|
||||
// Currently projection don't support deduplication when moving parts between shards.
|
||||
if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication)
|
||||
if (query_settings.allow_experimental_query_deduplication)
|
||||
return false;
|
||||
|
||||
// Currently projection don't support settings which implicitly modify aggregate functions.
|
||||
if (reading->getContext()->getSettingsRef().aggregate_functions_null_for_empty)
|
||||
if (query_settings.aggregate_functions_null_for_empty)
|
||||
return false;
|
||||
|
||||
/// Don't use projections if have mutations to apply
|
||||
/// because we need to apply them on original data.
|
||||
if (query_settings.apply_mutations_on_fly && reading->getMutationsSnapshot()->hasDataMutations())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -215,20 +222,15 @@ bool analyzeProjectionCandidate(
|
||||
{
|
||||
MergeTreeData::DataPartsVector projection_parts;
|
||||
MergeTreeData::DataPartsVector normal_parts;
|
||||
std::vector<AlterConversionsPtr> alter_conversions;
|
||||
|
||||
for (const auto & part_with_ranges : parts_with_ranges)
|
||||
{
|
||||
const auto & created_projections = part_with_ranges.data_part->getProjectionParts();
|
||||
auto it = created_projections.find(candidate.projection->name);
|
||||
if (it != created_projections.end() && !it->second->is_broken)
|
||||
{
|
||||
projection_parts.push_back(it->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
normal_parts.push_back(part_with_ranges.data_part);
|
||||
alter_conversions.push_back(part_with_ranges.alter_conversions);
|
||||
}
|
||||
}
|
||||
|
||||
if (projection_parts.empty())
|
||||
@ -241,6 +243,7 @@ bool analyzeProjectionCandidate(
|
||||
|
||||
auto projection_result_ptr = reader.estimateNumMarksToRead(
|
||||
std::move(projection_parts),
|
||||
reading.getMutationsSnapshot()->cloneEmpty(),
|
||||
required_column_names,
|
||||
candidate.projection->metadata,
|
||||
projection_query_info,
|
||||
@ -254,7 +257,7 @@ bool analyzeProjectionCandidate(
|
||||
if (!normal_parts.empty())
|
||||
{
|
||||
/// TODO: We can reuse existing analysis_result by filtering out projection parts
|
||||
auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), std::move(alter_conversions));
|
||||
auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts));
|
||||
|
||||
if (normal_result_ptr->selected_marks != 0)
|
||||
{
|
||||
|
@ -155,7 +155,7 @@ bool isPartitionKeySuitsGroupByKey(
|
||||
return false;
|
||||
|
||||
/// We are interested only in calculations required to obtain group by keys (and not aggregate function arguments for example).
|
||||
auto key_nodes = group_by_actions.findInOutpus(aggregating.getParams().keys);
|
||||
auto key_nodes = group_by_actions.findInOutputs(aggregating.getParams().keys);
|
||||
auto group_by_key_actions = ActionsDAG::cloneSubDAG(key_nodes, /*remove_aliases=*/ true);
|
||||
|
||||
const auto & gb_key_required_columns = group_by_key_actions.getRequiredColumnsNames();
|
||||
|
@ -229,7 +229,6 @@ public:
|
||||
{
|
||||
ranges_in_data_parts.emplace_back(
|
||||
initial_ranges_in_data_parts[part_index].data_part,
|
||||
initial_ranges_in_data_parts[part_index].alter_conversions,
|
||||
initial_ranges_in_data_parts[part_index].part_index_in_query,
|
||||
MarkRanges{mark_range});
|
||||
part_index_to_initial_ranges_in_data_parts_index[it->second] = part_index;
|
||||
|
@ -173,7 +173,10 @@ Pipe ReadFromMemoryStorageStep::makePipe()
|
||||
|
||||
for (size_t stream = 0; stream < num_streams; ++stream)
|
||||
{
|
||||
pipes.emplace_back(std::make_shared<MemorySource>(columns_to_read, storage_snapshot, current_data, parallel_execution_index));
|
||||
auto source = std::make_shared<MemorySource>(columns_to_read, storage_snapshot, current_data, parallel_execution_index);
|
||||
if (stream == 0)
|
||||
source->addTotalRowsApprox(snapshot_data.rows_approx);
|
||||
pipes.emplace_back(std::move(source));
|
||||
}
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
|
@ -266,7 +266,7 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c
|
||||
|
||||
ReadFromMergeTree::ReadFromMergeTree(
|
||||
MergeTreeData::DataPartsVector parts_,
|
||||
std::vector<AlterConversionsPtr> alter_conversions_,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_,
|
||||
Names all_column_names_,
|
||||
const MergeTreeData & data_,
|
||||
const SelectQueryInfo & query_info_,
|
||||
@ -283,7 +283,7 @@ ReadFromMergeTree::ReadFromMergeTree(
|
||||
query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_)
|
||||
, reader_settings(getMergeTreeReaderSettings(context_, query_info_))
|
||||
, prepared_parts(std::move(parts_))
|
||||
, alter_conversions_for_parts(std::move(alter_conversions_))
|
||||
, mutations_snapshot(std::move(mutations_))
|
||||
, all_column_names(std::move(all_column_names_))
|
||||
, data(data_)
|
||||
, actions_settings(ExpressionActionsSettings::fromContext(context_))
|
||||
@ -372,6 +372,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
|
||||
auto pool = std::make_shared<MergeTreeReadPoolParallelReplicas>(
|
||||
std::move(extension),
|
||||
std::move(parts_with_range),
|
||||
mutations_snapshot,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
@ -453,6 +454,7 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
{
|
||||
pool = std::make_shared<MergeTreePrefetchedReadPool>(
|
||||
std::move(parts_with_range),
|
||||
mutations_snapshot,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
@ -466,6 +468,7 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
{
|
||||
pool = std::make_shared<MergeTreeReadPool>(
|
||||
std::move(parts_with_range),
|
||||
mutations_snapshot,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
@ -554,6 +557,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
std::move(extension),
|
||||
mode,
|
||||
parts_with_ranges,
|
||||
mutations_snapshot,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
@ -569,6 +573,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
has_limit_below_one_block,
|
||||
read_type,
|
||||
parts_with_ranges,
|
||||
mutations_snapshot,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
@ -1038,7 +1043,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
|
||||
}
|
||||
|
||||
ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction);
|
||||
new_parts.emplace_back(part.data_part, part.alter_conversions, part.part_index_in_query, std::move(ranges_to_get_from_part));
|
||||
new_parts.emplace_back(part.data_part, part.part_index_in_query, std::move(ranges_to_get_from_part));
|
||||
}
|
||||
|
||||
splitted_parts_and_ranges.emplace_back(std::move(new_parts));
|
||||
@ -1265,7 +1270,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
||||
RangesInDataParts new_parts;
|
||||
|
||||
for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
|
||||
new_parts.emplace_back(part_it->data_part, part_it->alter_conversions, part_it->part_index_in_query, part_it->ranges);
|
||||
new_parts.emplace_back(part_it->data_part, part_it->part_index_in_query, part_it->ranges);
|
||||
|
||||
if (new_parts.empty())
|
||||
continue;
|
||||
@ -1378,15 +1383,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(bool find_exact_ranges) const
|
||||
{
|
||||
return selectRangesToRead(prepared_parts, alter_conversions_for_parts, find_exact_ranges);
|
||||
return selectRangesToRead(prepared_parts, find_exact_ranges);
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
MergeTreeData::DataPartsVector parts, std::vector<AlterConversionsPtr> alter_conversions, bool find_exact_ranges) const
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(MergeTreeData::DataPartsVector parts, bool find_exact_ranges) const
|
||||
{
|
||||
return selectRangesToRead(
|
||||
std::move(parts),
|
||||
std::move(alter_conversions),
|
||||
mutations_snapshot,
|
||||
storage_snapshot->metadata,
|
||||
query_info,
|
||||
context,
|
||||
@ -1404,9 +1408,11 @@ static void buildIndexes(
|
||||
const ActionsDAG * filter_actions_dag,
|
||||
const MergeTreeData & data,
|
||||
const MergeTreeData::DataPartsVector & parts,
|
||||
const MergeTreeData::MutationsSnapshotPtr & mutations_snapshot,
|
||||
const ContextPtr & context,
|
||||
const SelectQueryInfo & query_info,
|
||||
const StorageMetadataPtr & metadata_snapshot)
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const LoggerPtr & log)
|
||||
{
|
||||
indexes.reset();
|
||||
|
||||
@ -1432,19 +1438,21 @@ static void buildIndexes(
|
||||
indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */);
|
||||
}
|
||||
|
||||
indexes->part_values
|
||||
= MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context);
|
||||
indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context);
|
||||
MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context);
|
||||
|
||||
indexes->use_skip_indexes = settings.use_skip_indexes;
|
||||
bool final = query_info.isFinal();
|
||||
|
||||
if (final && !settings.use_skip_indexes_if_final)
|
||||
if (query_info.isFinal() && !settings.use_skip_indexes_if_final)
|
||||
indexes->use_skip_indexes = false;
|
||||
|
||||
if (!indexes->use_skip_indexes)
|
||||
return;
|
||||
|
||||
const auto & all_indexes = metadata_snapshot->getSecondaryIndices();
|
||||
|
||||
if (all_indexes.empty())
|
||||
return;
|
||||
|
||||
std::unordered_set<std::string> ignored_index_names;
|
||||
|
||||
if (settings.ignore_data_skipping_indices.changed)
|
||||
@ -1469,15 +1477,34 @@ static void buildIndexes(
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices);
|
||||
}
|
||||
|
||||
auto all_updated_columns = mutations_snapshot->getAllUpdatedColumns();
|
||||
|
||||
UsefulSkipIndexes skip_indexes;
|
||||
using Key = std::pair<String, size_t>;
|
||||
std::map<Key, size_t> merged;
|
||||
|
||||
for (const auto & index : metadata_snapshot->getSecondaryIndices())
|
||||
{
|
||||
if (!ignored_index_names.contains(index.name))
|
||||
for (const auto & index : all_indexes)
|
||||
{
|
||||
if (ignored_index_names.contains(index.name))
|
||||
continue;
|
||||
|
||||
auto index_helper = MergeTreeIndexFactory::instance().get(index);
|
||||
|
||||
if (!all_updated_columns.empty())
|
||||
{
|
||||
auto required_columns = index_helper->getColumnsRequiredForIndexCalc();
|
||||
auto it = std::ranges::find_if(required_columns, [&](const auto & column_name)
|
||||
{
|
||||
return all_updated_columns.contains(column_name);
|
||||
});
|
||||
|
||||
if (it != required_columns.end())
|
||||
{
|
||||
LOG_TRACE(log, "Index {} is not used because it depends on column {} which will be updated on fly", index.name, *it);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (index_helper->isMergeable())
|
||||
{
|
||||
auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size());
|
||||
@ -1488,10 +1515,11 @@ static void buildIndexes(
|
||||
}
|
||||
|
||||
skip_indexes.merged_indices[it->second].addIndex(index_helper);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
MergeTreeIndexConditionPtr condition;
|
||||
|
||||
if (index_helper->isVectorSimilarityIndex())
|
||||
{
|
||||
#if USE_USEARCH
|
||||
@ -1500,17 +1528,18 @@ static void buildIndexes(
|
||||
#endif
|
||||
if (const auto * legacy_vector_similarity_index = typeid_cast<const MergeTreeIndexLegacyVectorSimilarity *>(index_helper.get()))
|
||||
condition = legacy_vector_similarity_index->createIndexCondition(query_info, context);
|
||||
|
||||
if (!condition)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
|
||||
}
|
||||
else
|
||||
{
|
||||
condition = index_helper->createIndexCondition(filter_actions_dag, context);
|
||||
}
|
||||
|
||||
if (!condition->alwaysUnknownOrTrue())
|
||||
skip_indexes.useful_indices.emplace_back(index_helper, condition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// move minmax indices to first positions, so they will be applied first as cheapest ones
|
||||
std::stable_sort(begin(skip_indexes.useful_indices), end(skip_indexes.useful_indices), [](const auto & l, const auto & r)
|
||||
@ -1547,15 +1576,17 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes)
|
||||
query_info.filter_actions_dag.get(),
|
||||
data,
|
||||
prepared_parts,
|
||||
mutations_snapshot,
|
||||
context,
|
||||
query_info,
|
||||
storage_snapshot->metadata);
|
||||
storage_snapshot->metadata,
|
||||
log);
|
||||
}
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
std::vector<AlterConversionsPtr> alter_conversions,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const SelectQueryInfo & query_info_,
|
||||
ContextPtr context_,
|
||||
@ -1586,7 +1617,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
const Names & primary_key_column_names = primary_key.column_names;
|
||||
|
||||
if (!indexes)
|
||||
buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, context_, query_info_, metadata_snapshot);
|
||||
buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, mutations_snapshot, context_, query_info_, metadata_snapshot, log);
|
||||
|
||||
if (indexes->part_values && indexes->part_values->empty())
|
||||
return std::make_shared<AnalysisResult>(std::move(result));
|
||||
@ -1617,10 +1648,9 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
|
||||
{
|
||||
MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
parts,
|
||||
indexes->partition_pruner,
|
||||
indexes->minmax_idx_condition,
|
||||
parts,
|
||||
alter_conversions,
|
||||
indexes->part_values,
|
||||
metadata_snapshot,
|
||||
data,
|
||||
@ -1649,7 +1679,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
auto reader_settings = getMergeTreeReaderSettings(context_, query_info_);
|
||||
result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes(
|
||||
std::move(parts),
|
||||
std::move(alter_conversions),
|
||||
metadata_snapshot,
|
||||
context_,
|
||||
indexes->key_condition,
|
||||
|
@ -110,7 +110,7 @@ public:
|
||||
|
||||
ReadFromMergeTree(
|
||||
MergeTreeData::DataPartsVector parts_,
|
||||
std::vector<AlterConversionsPtr> alter_conversions_,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot_,
|
||||
Names all_column_names_,
|
||||
const MergeTreeData & data_,
|
||||
const SelectQueryInfo & query_info_,
|
||||
@ -154,7 +154,7 @@ public:
|
||||
|
||||
static AnalysisResultPtr selectRangesToRead(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
std::vector<AlterConversionsPtr> alter_conversions,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
@ -166,8 +166,7 @@ public:
|
||||
std::optional<Indexes> & indexes,
|
||||
bool find_exact_ranges);
|
||||
|
||||
AnalysisResultPtr selectRangesToRead(
|
||||
MergeTreeData::DataPartsVector parts, std::vector<AlterConversionsPtr> alter_conversions, bool find_exact_ranges = false) const;
|
||||
AnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts, bool find_exact_ranges = false) const;
|
||||
|
||||
AnalysisResultPtr selectRangesToRead(bool find_exact_ranges = false) const;
|
||||
|
||||
@ -188,7 +187,7 @@ public:
|
||||
void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); }
|
||||
|
||||
const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; }
|
||||
const std::vector<AlterConversionsPtr> & getAlterConvertionsForParts() const { return alter_conversions_for_parts; }
|
||||
MergeTreeData::MutationsSnapshotPtr getMutationsSnapshot() const { return mutations_snapshot; }
|
||||
|
||||
const MergeTreeData & getMergeTreeData() const { return data; }
|
||||
size_t getMaxBlockSize() const { return block_size.max_block_size_rows; }
|
||||
@ -209,7 +208,7 @@ private:
|
||||
MergeTreeReaderSettings reader_settings;
|
||||
|
||||
MergeTreeData::DataPartsVector prepared_parts;
|
||||
std::vector<AlterConversionsPtr> alter_conversions_for_parts;
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot;
|
||||
|
||||
Names all_column_names;
|
||||
|
||||
|
@ -83,7 +83,11 @@ void WriteBufferFromHTTPServerResponse::finishSendHeaders()
|
||||
return;
|
||||
|
||||
if (!headers_started_sending)
|
||||
{
|
||||
if (compression_method != CompressionMethod::None)
|
||||
response.set("Content-Encoding", toContentEncodingName(compression_method));
|
||||
startSendHeaders();
|
||||
}
|
||||
|
||||
writeHeaderSummary();
|
||||
writeExceptionCode();
|
||||
@ -105,7 +109,13 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
|
||||
initialized = true;
|
||||
|
||||
if (compression_method != CompressionMethod::None)
|
||||
{
|
||||
/// If we've already sent headers, just send the `Content-Encoding` down the socket directly
|
||||
if (headers_started_sending)
|
||||
socketSendStr("Content-Encoding: " + toContentEncodingName(compression_method) + "\r\n");
|
||||
else
|
||||
response.set("Content-Encoding", toContentEncodingName(compression_method));
|
||||
}
|
||||
|
||||
startSendHeaders();
|
||||
finishSendHeaders();
|
||||
@ -177,8 +187,12 @@ void WriteBufferFromHTTPServerResponse::finalizeImpl()
|
||||
/// If no body data just send header
|
||||
startSendHeaders();
|
||||
|
||||
/// `finalizeImpl` must be idempotent, so set `initialized` here to not send stuff twice
|
||||
if (!initialized && offset() && compression_method != CompressionMethod::None)
|
||||
{
|
||||
initialized = true;
|
||||
socketSendStr("Content-Encoding: " + toContentEncodingName(compression_method) + "\r\n");
|
||||
}
|
||||
|
||||
finishSendHeaders();
|
||||
}
|
||||
|
@ -271,6 +271,7 @@ public:
|
||||
|
||||
/// Return true if the trivial count query could be optimized without reading the data at all
|
||||
/// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method.
|
||||
/// 'storage_snapshot' may be nullptr.
|
||||
virtual bool supportsTrivialCountOptimization(const StorageSnapshotPtr & /*storage_snapshot*/, ContextPtr /*query_context*/) const
|
||||
{
|
||||
return false;
|
||||
|
@ -9,9 +9,15 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
bool AlterConversions::supportsMutationCommandType(MutationCommand::Type t)
|
||||
bool AlterConversions::isSupportedDataMutation(MutationCommand::Type)
|
||||
{
|
||||
return t == MutationCommand::Type::RENAME_COLUMN;
|
||||
/// Currently there is no such mutations. See setting 'apply_mutations_on_fly'.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AlterConversions::isSupportedMetadataMutation(MutationCommand::Type type)
|
||||
{
|
||||
return type == MutationCommand::Type::RENAME_COLUMN;
|
||||
}
|
||||
|
||||
void AlterConversions::addMutationCommand(const MutationCommand & command)
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MutationCommands.h>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,11 +11,17 @@ namespace DB
|
||||
/// Alter conversions which should be applied on-fly for part.
|
||||
/// Built from of the most recent mutation commands for part.
|
||||
/// Now only ALTER RENAME COLUMN is applied.
|
||||
class AlterConversions : private boost::noncopyable
|
||||
class AlterConversions : private WithContext, boost::noncopyable
|
||||
{
|
||||
public:
|
||||
AlterConversions() = default;
|
||||
|
||||
AlterConversions(StorageMetadataPtr metadata_snapshot_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, metadata_snapshot(std::move(metadata_snapshot_))
|
||||
{
|
||||
}
|
||||
|
||||
struct RenamePair
|
||||
{
|
||||
std::string rename_to;
|
||||
@ -34,11 +40,13 @@ public:
|
||||
/// Get column old name before rename (lookup by key in rename_map)
|
||||
std::string getColumnOldName(const std::string & new_name) const;
|
||||
|
||||
static bool supportsMutationCommandType(MutationCommand::Type);
|
||||
static bool isSupportedDataMutation(MutationCommand::Type type);
|
||||
static bool isSupportedMetadataMutation(MutationCommand::Type type);
|
||||
|
||||
private:
|
||||
/// Rename map new_name -> old_name.
|
||||
std::vector<RenamePair> rename_map;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
};
|
||||
|
||||
using AlterConversionsPtr = std::shared_ptr<const AlterConversions>;
|
||||
|
@ -2052,6 +2052,7 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix
|
||||
IDataPartStorage::ClonePartParams params
|
||||
{
|
||||
.copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication,
|
||||
.keep_metadata_version = prefix == "covered-by-broken",
|
||||
.make_source_readonly = true,
|
||||
.external_transaction = disk_transaction
|
||||
};
|
||||
|
@ -291,6 +291,14 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
if (enabledBlockOffsetColumn(global_ctx))
|
||||
addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type);
|
||||
|
||||
MergeTreeData::IMutationsSnapshot::Params params
|
||||
{
|
||||
.metadata_version = global_ctx->metadata_snapshot->getMetadataVersion(),
|
||||
.min_part_metadata_version = MergeTreeData::getMinMetadataVersion(global_ctx->future_part->parts),
|
||||
};
|
||||
|
||||
auto mutations_snapshot = global_ctx->data->getMutationsSnapshot(params);
|
||||
|
||||
SerializationInfo::Settings info_settings =
|
||||
{
|
||||
.ratio_of_defaults_for_sparse = global_ctx->data->getSettings()->ratio_of_defaults_for_sparse_serialization,
|
||||
@ -298,10 +306,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
};
|
||||
|
||||
SerializationInfoByName infos(global_ctx->storage_columns, info_settings);
|
||||
global_ctx->alter_conversions.reserve(global_ctx->future_part->parts.size());
|
||||
|
||||
for (const auto & part : global_ctx->future_part->parts)
|
||||
{
|
||||
global_ctx->new_data_part->ttl_infos.update(part->ttl_infos);
|
||||
|
||||
if (global_ctx->metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(global_ctx->metadata_snapshot))
|
||||
{
|
||||
LOG_INFO(ctx->log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name);
|
||||
@ -322,6 +332,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
|
||||
infos.add(part_infos);
|
||||
}
|
||||
|
||||
global_ctx->alter_conversions.push_back(MergeTreeData::getAlterConversionsForPart(part, mutations_snapshot, global_ctx->metadata_snapshot, global_ctx->context));
|
||||
}
|
||||
|
||||
const auto & local_part_min_ttl = global_ctx->new_data_part->ttl_infos.part_min_ttl;
|
||||
@ -879,7 +891,7 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
|
||||
{
|
||||
/// Read from all parts
|
||||
std::vector<QueryPlanPtr> plans;
|
||||
for (const auto & part : global_ctx->future_part->parts)
|
||||
for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num)
|
||||
{
|
||||
auto plan_for_part = std::make_unique<QueryPlan>();
|
||||
createReadFromPartStep(
|
||||
@ -887,7 +899,8 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
|
||||
*plan_for_part,
|
||||
*global_ctx->data,
|
||||
global_ctx->storage_snapshot,
|
||||
part,
|
||||
global_ctx->future_part->parts[part_num],
|
||||
global_ctx->alter_conversions[part_num],
|
||||
Names{column_name},
|
||||
global_ctx->input_rows_filtered,
|
||||
/*apply_deleted_mask=*/ true,
|
||||
@ -1570,7 +1583,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
|
||||
|
||||
/// Read from all parts
|
||||
std::vector<QueryPlanPtr> plans;
|
||||
for (const auto & part : global_ctx->future_part->parts)
|
||||
for (size_t i = 0; i < global_ctx->future_part->parts.size(); ++i)
|
||||
{
|
||||
if (part->getMarksCount() == 0)
|
||||
LOG_TRACE(ctx->log, "Part {} is empty", part->name);
|
||||
@ -1581,7 +1594,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
|
||||
*plan_for_part,
|
||||
*global_ctx->data,
|
||||
global_ctx->storage_snapshot,
|
||||
part,
|
||||
global_ctx->future_part->parts[i],
|
||||
global_ctx->alter_conversions[i],
|
||||
global_ctx->merging_columns.getNames(),
|
||||
global_ctx->input_rows_filtered,
|
||||
/*apply_deleted_mask=*/ true,
|
||||
|
@ -166,6 +166,7 @@ private:
|
||||
StorageSnapshotPtr storage_snapshot{nullptr};
|
||||
StorageMetadataPtr metadata_snapshot{nullptr};
|
||||
FutureMergedMutatedPartPtr future_part{nullptr};
|
||||
std::vector<AlterConversionsPtr> alter_conversions;
|
||||
/// This will be either nullptr or new_data_part, so raw pointer is ok.
|
||||
IMergeTreeDataPart * parent_part{nullptr};
|
||||
ContextPtr context{nullptr};
|
||||
|
@ -7156,11 +7156,16 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead(
|
||||
ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const
|
||||
{
|
||||
const auto & snapshot_data = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data);
|
||||
const auto & parts = snapshot_data.parts;
|
||||
|
||||
MergeTreeDataSelectExecutor reader(*this);
|
||||
auto result_ptr = reader.estimateNumMarksToRead(
|
||||
parts, {}, storage_snapshot->metadata, query_info, query_context, query_context->getSettingsRef().max_threads);
|
||||
snapshot_data.parts,
|
||||
snapshot_data.mutations_snapshot,
|
||||
storage_snapshot->metadata->getColumns().getAll().getNames(),
|
||||
storage_snapshot->metadata,
|
||||
query_info,
|
||||
query_context,
|
||||
query_context->getSettingsRef().max_threads);
|
||||
|
||||
UInt64 total_rows = result_ptr->selected_rows;
|
||||
if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows)
|
||||
@ -8174,11 +8179,15 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S
|
||||
return true;
|
||||
}
|
||||
|
||||
AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartPtr part) const
|
||||
AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(
|
||||
const MergeTreeDataPartPtr & part,
|
||||
const MutationsSnapshotPtr & mutations,
|
||||
const StorageMetadataPtr & metadata,
|
||||
const ContextPtr & query_context)
|
||||
{
|
||||
auto commands = getAlterMutationCommandsForPart(part);
|
||||
auto commands = mutations->getAlterMutationCommandsForPart(part);
|
||||
auto result = std::make_shared<AlterConversions>(metadata, query_context);
|
||||
|
||||
auto result = std::make_shared<AlterConversions>();
|
||||
for (const auto & command : commands | std::views::reverse)
|
||||
result->addMutationCommand(command);
|
||||
|
||||
@ -8470,9 +8479,28 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart
|
||||
DB::updateObjectColumns(object_columns, columns, part->getColumns());
|
||||
}
|
||||
|
||||
bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const
|
||||
bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const
|
||||
{
|
||||
return !hasLightweightDeletedMask();
|
||||
if (hasLightweightDeletedMask())
|
||||
return false;
|
||||
|
||||
if (!storage_snapshot)
|
||||
return !query_context->getSettingsRef().apply_mutations_on_fly;
|
||||
|
||||
const auto & snapshot_data = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data);
|
||||
return !snapshot_data.mutations_snapshot->hasDataMutations();
|
||||
}
|
||||
|
||||
Int64 MergeTreeData::getMinMetadataVersion(const DataPartsVector & parts)
|
||||
{
|
||||
Int64 version = -1;
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
Int64 part_version = part->getMetadataVersion();
|
||||
if (version == -1 || part_version < version)
|
||||
version = part_version;
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const
|
||||
@ -8486,10 +8514,14 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr &
|
||||
object_columns_copy = object_columns;
|
||||
}
|
||||
|
||||
snapshot_data->alter_conversions.reserve(snapshot_data->parts.size());
|
||||
for (const auto & part : snapshot_data->parts)
|
||||
snapshot_data->alter_conversions.push_back(getAlterConversionsForPart(part));
|
||||
IMutationsSnapshot::Params params
|
||||
{
|
||||
.metadata_version = metadata_snapshot->getMetadataVersion(),
|
||||
.min_part_metadata_version = getMinMetadataVersion(snapshot_data->parts),
|
||||
.need_data_mutations = query_context->getSettingsRef().apply_mutations_on_fly,
|
||||
};
|
||||
|
||||
snapshot_data->mutations_snapshot = getMutationsSnapshot(params);
|
||||
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, std::move(object_columns_copy), std::move(snapshot_data));
|
||||
}
|
||||
|
||||
@ -8707,28 +8739,57 @@ void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key)
|
||||
}
|
||||
}
|
||||
|
||||
bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic<ssize_t> & alter_conversions_mutations, bool remove)
|
||||
static void updateMutationsCounters(
|
||||
Int64 & num_data_mutations_to_apply,
|
||||
Int64 & num_metadata_mutations_to_apply,
|
||||
const MutationCommands & commands,
|
||||
Int64 increment)
|
||||
{
|
||||
if (num_data_mutations_to_apply < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_data_mutations_to_apply);
|
||||
|
||||
if (num_metadata_mutations_to_apply < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", num_metadata_mutations_to_apply);
|
||||
|
||||
bool has_data_mutation = false;
|
||||
bool has_metadata_mutation = false;
|
||||
|
||||
for (const auto & command : commands)
|
||||
{
|
||||
if (AlterConversions::supportsMutationCommandType(command.type))
|
||||
if (!has_data_mutation && AlterConversions::isSupportedDataMutation(command.type))
|
||||
{
|
||||
if (remove)
|
||||
num_data_mutations_to_apply += increment;
|
||||
has_data_mutation = true;
|
||||
|
||||
if (num_data_mutations_to_apply < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_data_mutations_to_apply);
|
||||
}
|
||||
|
||||
if (!has_metadata_mutation && AlterConversions::isSupportedMetadataMutation(command.type))
|
||||
{
|
||||
--alter_conversions_mutations;
|
||||
if (alter_conversions_mutations < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations);
|
||||
num_metadata_mutations_to_apply += increment;
|
||||
has_metadata_mutation = true;
|
||||
|
||||
if (num_metadata_mutations_to_apply < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", num_metadata_mutations_to_apply);
|
||||
}
|
||||
else
|
||||
}
|
||||
}
|
||||
|
||||
void incrementMutationsCounters(
|
||||
Int64 & num_data_mutations_to_apply,
|
||||
Int64 & num_metadata_mutations_to_apply,
|
||||
const MutationCommands & commands)
|
||||
{
|
||||
if (alter_conversions_mutations < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations);
|
||||
++alter_conversions_mutations;
|
||||
updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
void decrementMutationsCounters(
|
||||
Int64 & num_data_mutations_to_apply,
|
||||
Int64 & num_metadata_mutations_to_apply,
|
||||
const MutationCommands & commands)
|
||||
{
|
||||
updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, -1);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -443,14 +443,53 @@ public:
|
||||
|
||||
bool areAsynchronousInsertsEnabled() const override;
|
||||
|
||||
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override;
|
||||
bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const override;
|
||||
|
||||
/// A snapshot of pending mutations that weren't applied to some of the parts yet
|
||||
/// and should be applied on the fly (i.e. when reading from the part).
|
||||
/// Mutations not supported by AlterConversions (supportsMutationCommandType()) can be omitted.
|
||||
struct IMutationsSnapshot
|
||||
{
|
||||
/// Contains info that doesn't depend on state of mutations.
|
||||
struct Params
|
||||
{
|
||||
Int64 metadata_version = -1;
|
||||
Int64 min_part_metadata_version = -1;
|
||||
bool need_data_mutations = false;
|
||||
};
|
||||
|
||||
/// Contains info that depends on state of mutations.
|
||||
struct Info
|
||||
{
|
||||
Int64 num_data_mutations = 0;
|
||||
Int64 num_metadata_mutations = 0;
|
||||
};
|
||||
|
||||
Params params;
|
||||
Info info;
|
||||
|
||||
IMutationsSnapshot() = default;
|
||||
IMutationsSnapshot(Params params_, Info info_): params(std::move(params_)), info(std::move(info_)) {}
|
||||
|
||||
/// Returns mutation commands that are required to be applied to the `part`.
|
||||
/// @return list of mutation commands, in *reverse* order (newest to oldest)
|
||||
virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
|
||||
virtual std::shared_ptr<IMutationsSnapshot> cloneEmpty() const = 0;
|
||||
virtual NameSet getAllUpdatedColumns() const = 0;
|
||||
|
||||
bool hasDataMutations() const { return params.need_data_mutations && info.num_data_mutations > 0; }
|
||||
|
||||
virtual ~IMutationsSnapshot() = default;
|
||||
};
|
||||
|
||||
using MutationsSnapshotPtr = std::shared_ptr<const IMutationsSnapshot>;
|
||||
|
||||
/// Snapshot for MergeTree contains the current set of data parts
|
||||
/// at the moment of the start of query.
|
||||
/// and mutations required to be applied at the moment of the start of query.
|
||||
struct SnapshotData : public StorageSnapshot::Data
|
||||
{
|
||||
DataPartsVector parts;
|
||||
std::vector<AlterConversionsPtr> alter_conversions;
|
||||
MutationsSnapshotPtr mutations_snapshot;
|
||||
};
|
||||
|
||||
StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override;
|
||||
@ -929,8 +968,18 @@ public:
|
||||
|
||||
Disks getDisks() const { return getStoragePolicy()->getDisks(); }
|
||||
|
||||
/// Returns a snapshot of mutations that probably will be applied on the fly to parts during reading.
|
||||
virtual MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const = 0;
|
||||
|
||||
/// Returns the minimum version of metadata among parts.
|
||||
static Int64 getMinMetadataVersion(const DataPartsVector & parts);
|
||||
|
||||
/// Return alter conversions for part which must be applied on fly.
|
||||
AlterConversionsPtr getAlterConversionsForPart(MergeTreeDataPartPtr part) const;
|
||||
static AlterConversionsPtr getAlterConversionsForPart(
|
||||
const MergeTreeDataPartPtr & part,
|
||||
const MutationsSnapshotPtr & mutations,
|
||||
const StorageMetadataPtr & metadata,
|
||||
const ContextPtr & query_context);
|
||||
|
||||
/// Returns destination disk or volume for the TTL rule according to current storage policy.
|
||||
SpacePtr getDestinationForMoveTTL(const TTLDescription & move_ttl) const;
|
||||
@ -1450,13 +1499,6 @@ protected:
|
||||
/// mechanisms for parts locking
|
||||
virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0;
|
||||
|
||||
/// Return pending mutations that weren't applied to `part` yet and should be applied on the fly
|
||||
/// (i.e. when reading from the part). Mutations not supported by AlterConversions
|
||||
/// (supportsMutationCommandType()) can be omitted.
|
||||
///
|
||||
/// @return list of mutations, in *reverse* order (newest to oldest)
|
||||
virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
|
||||
|
||||
struct PartBackupEntries
|
||||
{
|
||||
String part_name;
|
||||
@ -1731,7 +1773,14 @@ struct CurrentlySubmergingEmergingTagger
|
||||
};
|
||||
|
||||
/// Look at MutationCommands if it contains mutations for AlterConversions, update the counter.
|
||||
/// Return true if the counter had been updated
|
||||
bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic<ssize_t> & alter_conversions_mutations, bool remove);
|
||||
void incrementMutationsCounters(
|
||||
Int64 & num_data_mutations_to_apply,
|
||||
Int64 & num_metadata_mutations_to_apply,
|
||||
const MutationCommands & commands);
|
||||
|
||||
void decrementMutationsCounters(
|
||||
Int64 & num_data_mutations_to_apply,
|
||||
Int64 & num_metadata_mutations_to_apply,
|
||||
const MutationCommands & commands);
|
||||
|
||||
}
|
||||
|
@ -133,12 +133,10 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
|
||||
bool enable_parallel_reading) const
|
||||
{
|
||||
const auto & snapshot_data = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data);
|
||||
const auto & parts = snapshot_data.parts;
|
||||
const auto & alter_conversions = snapshot_data.alter_conversions;
|
||||
|
||||
auto step = readFromParts(
|
||||
parts,
|
||||
alter_conversions,
|
||||
snapshot_data.parts,
|
||||
snapshot_data.mutations_snapshot,
|
||||
column_names_to_return,
|
||||
storage_snapshot,
|
||||
query_info,
|
||||
@ -500,10 +498,9 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
|
||||
}
|
||||
|
||||
void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
const std::optional<PartitionPruner> & partition_pruner,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const MergeTreeData & data,
|
||||
@ -512,8 +509,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
LoggerPtr log,
|
||||
ReadFromMergeTree::IndexStats & index_stats)
|
||||
{
|
||||
chassert(alter_conversions.empty() || parts.size() == alter_conversions.size());
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
DataTypes minmax_columns_types;
|
||||
|
||||
@ -537,7 +532,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
|
||||
selectPartsToReadWithUUIDFilter(
|
||||
parts,
|
||||
alter_conversions,
|
||||
part_values,
|
||||
data.getPinnedPartUUIDs(),
|
||||
minmax_idx_condition,
|
||||
@ -550,7 +544,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
else
|
||||
selectPartsToRead(
|
||||
parts,
|
||||
alter_conversions,
|
||||
part_values,
|
||||
minmax_idx_condition,
|
||||
minmax_columns_types,
|
||||
@ -589,7 +582,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
|
||||
RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes(
|
||||
MergeTreeData::DataPartsVector && parts,
|
||||
std::vector<AlterConversionsPtr> && alter_conversions,
|
||||
StorageMetadataPtr metadata_snapshot,
|
||||
const ContextPtr & context,
|
||||
const KeyCondition & key_condition,
|
||||
@ -602,8 +594,6 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
bool use_skip_indexes,
|
||||
bool find_exact_ranges)
|
||||
{
|
||||
chassert(alter_conversions.empty() || parts.size() == alter_conversions.size());
|
||||
|
||||
RangesInDataParts parts_with_ranges;
|
||||
parts_with_ranges.resize(parts.size());
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
@ -662,11 +652,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
auto process_part = [&](size_t part_index)
|
||||
{
|
||||
auto & part = parts[part_index];
|
||||
auto alter_conversions_for_part = !alter_conversions.empty()
|
||||
? alter_conversions[part_index]
|
||||
: std::make_shared<AlterConversions>();
|
||||
|
||||
RangesInDataPart ranges(part, alter_conversions_for_part, part_index);
|
||||
RangesInDataPart ranges(part, part_index);
|
||||
size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal();
|
||||
|
||||
if (metadata_snapshot->hasPrimaryKey() || part_offset_condition)
|
||||
@ -904,6 +891,7 @@ std::shared_ptr<QueryIdHolder> MergeTreeDataSelectExecutor::checkLimits(
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const Names & column_names_to_return,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const SelectQueryInfo & query_info,
|
||||
@ -916,11 +904,9 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
|
||||
return std::make_shared<ReadFromMergeTree::AnalysisResult>();
|
||||
|
||||
std::optional<ReadFromMergeTree::Indexes> indexes;
|
||||
/// NOTE: We don't need alter_conversions because the returned analysis_result is only used for:
|
||||
/// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions.
|
||||
return ReadFromMergeTree::selectRangesToRead(
|
||||
std::move(parts),
|
||||
/*alter_conversions=*/{},
|
||||
mutations_snapshot,
|
||||
metadata_snapshot,
|
||||
query_info,
|
||||
context,
|
||||
@ -935,7 +921,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
|
||||
|
||||
QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
std::vector<AlterConversionsPtr> alter_conversions,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const Names & column_names_to_return,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const SelectQueryInfo & query_info,
|
||||
@ -957,7 +943,7 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
|
||||
return std::make_unique<ReadFromMergeTree>(
|
||||
std::move(parts),
|
||||
std::move(alter_conversions),
|
||||
std::move(mutations_snapshot),
|
||||
column_names_to_return,
|
||||
data,
|
||||
query_info,
|
||||
@ -1554,7 +1540,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
|
||||
|
||||
void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
const DataTypes & minmax_columns_types,
|
||||
@ -1563,14 +1548,11 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
PartFilterCounters & counters)
|
||||
{
|
||||
MergeTreeData::DataPartsVector prev_parts;
|
||||
std::vector<AlterConversionsPtr> prev_conversions;
|
||||
|
||||
std::swap(prev_parts, parts);
|
||||
std::swap(prev_conversions, alter_conversions);
|
||||
|
||||
for (size_t i = 0; i < prev_parts.size(); ++i)
|
||||
for (const auto & part_or_projection : prev_parts)
|
||||
{
|
||||
const auto * part = prev_parts[i]->isProjectionPart() ? prev_parts[i]->getParentPart() : prev_parts[i].get();
|
||||
const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get();
|
||||
if (part_values && part_values->find(part->name) == part_values->end())
|
||||
continue;
|
||||
|
||||
@ -1607,15 +1589,12 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
counters.num_parts_after_partition_pruner += 1;
|
||||
counters.num_granules_after_partition_pruner += num_granules;
|
||||
|
||||
parts.push_back(prev_parts[i]);
|
||||
if (!prev_conversions.empty())
|
||||
alter_conversions.push_back(prev_conversions[i]);
|
||||
parts.push_back(part_or_projection);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
@ -1628,22 +1607,17 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
{
|
||||
/// process_parts prepare parts that have to be read for the query,
|
||||
/// returns false if duplicated parts' UUID have been met
|
||||
auto select_parts = [&] (
|
||||
MergeTreeData::DataPartsVector & selected_parts,
|
||||
std::vector<AlterConversionsPtr> & selected_conversions) -> bool
|
||||
auto select_parts = [&](MergeTreeData::DataPartsVector & selected_parts) -> bool
|
||||
{
|
||||
auto ignored_part_uuids = query_context->getIgnoredPartUUIDs();
|
||||
std::unordered_set<UUID> temp_part_uuids;
|
||||
|
||||
MergeTreeData::DataPartsVector prev_parts;
|
||||
std::vector<AlterConversionsPtr> prev_conversions;
|
||||
|
||||
std::swap(prev_parts, selected_parts);
|
||||
std::swap(prev_conversions, selected_conversions);
|
||||
|
||||
for (size_t i = 0; i < prev_parts.size(); ++i)
|
||||
for (const auto & part_or_projection : prev_parts)
|
||||
{
|
||||
const auto * part = prev_parts[i]->isProjectionPart() ? prev_parts[i]->getParentPart() : prev_parts[i].get();
|
||||
const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get();
|
||||
if (part_values && part_values->find(part->name) == part_values->end())
|
||||
continue;
|
||||
|
||||
@ -1693,9 +1667,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Found a part with the same UUID on the same replica.");
|
||||
}
|
||||
|
||||
selected_parts.push_back(prev_parts[i]);
|
||||
if (!prev_conversions.empty())
|
||||
selected_conversions.push_back(prev_conversions[i]);
|
||||
selected_parts.push_back(part_or_projection);
|
||||
}
|
||||
|
||||
if (!temp_part_uuids.empty())
|
||||
@ -1714,7 +1686,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
};
|
||||
|
||||
/// Process parts that have to be read for a query.
|
||||
auto needs_retry = !select_parts(parts, alter_conversions);
|
||||
auto needs_retry = !select_parts(parts);
|
||||
|
||||
/// If any duplicated part UUIDs met during the first step, try to ignore them in second pass.
|
||||
/// This may happen when `prefer_localhost_replica` is set and "distributed" stage runs in the same process with "remote" stage.
|
||||
@ -1725,7 +1697,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
counters = PartFilterCounters();
|
||||
|
||||
/// Second attempt didn't help, throw an exception
|
||||
if (!select_parts(parts, alter_conversions))
|
||||
if (!select_parts(parts))
|
||||
throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate UUIDs while processing query.");
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
/// The same as read, but with specified set of parts.
|
||||
QueryPlanStepPtr readFromParts(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
std::vector<AlterConversionsPtr> alter_conversions,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const SelectQueryInfo & query_info,
|
||||
@ -56,6 +56,7 @@ public:
|
||||
/// This method is used to select best projection for table.
|
||||
ReadFromMergeTree::AnalysisResultPtr estimateNumMarksToRead(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
|
||||
const Names & column_names,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const SelectQueryInfo & query_info,
|
||||
@ -120,7 +121,6 @@ private:
|
||||
/// as well as `max_block_number_to_read`.
|
||||
static void selectPartsToRead(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
const DataTypes & minmax_columns_types,
|
||||
@ -131,7 +131,6 @@ private:
|
||||
/// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
|
||||
static void selectPartsToReadWithUUIDFilter(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
@ -175,10 +174,9 @@ public:
|
||||
|
||||
/// Filter parts using minmax index and partition key.
|
||||
static void filterPartsByPartition(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
const std::optional<PartitionPruner> & partition_pruner,
|
||||
const std::optional<KeyCondition> & minmax_idx_condition,
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
std::vector<AlterConversionsPtr> & alter_conversions,
|
||||
const std::optional<std::unordered_set<String>> & part_values,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const MergeTreeData & data,
|
||||
@ -192,7 +190,6 @@ public:
|
||||
/// If 'check_limits = true' it will throw exception if the amount of data exceed the limits from settings.
|
||||
static RangesInDataParts filterPartsByPrimaryKeyAndSkipIndexes(
|
||||
MergeTreeData::DataPartsVector && parts,
|
||||
std::vector<AlterConversionsPtr> && alter_conversions,
|
||||
StorageMetadataPtr metadata_snapshot,
|
||||
const ContextPtr & context,
|
||||
const KeyCondition & key_condition,
|
||||
|
@ -50,7 +50,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_)
|
||||
MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number,
|
||||
const TransactionID & tid_, const WriteSettings & settings)
|
||||
: create_time(time(nullptr))
|
||||
, commands(std::move(commands_))
|
||||
, commands(std::make_shared<MutationCommands>(std::move(commands_)))
|
||||
, disk(std::move(disk_))
|
||||
, path_prefix(path_prefix_)
|
||||
, file_name("tmp_mutation_" + toString(tmp_number) + ".txt")
|
||||
@ -63,7 +63,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP
|
||||
*out << "format version: 1\n"
|
||||
<< "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n";
|
||||
*out << "commands: ";
|
||||
commands.writeText(*out, /* with_pure_metadata_commands = */ false);
|
||||
commands->writeText(*out, /* with_pure_metadata_commands = */ false);
|
||||
*out << "\n";
|
||||
if (tid.isPrehistoric())
|
||||
{
|
||||
@ -116,7 +116,8 @@ void MergeTreeMutationEntry::writeCSN(CSN csn_)
|
||||
}
|
||||
|
||||
MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & path_prefix_, const String & file_name_)
|
||||
: disk(std::move(disk_))
|
||||
: commands(std::make_shared<MutationCommands>())
|
||||
, disk(std::move(disk_))
|
||||
, path_prefix(path_prefix_)
|
||||
, file_name(file_name_)
|
||||
, is_temp(false)
|
||||
@ -133,7 +134,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & pat
|
||||
create_time_dt.hour(), create_time_dt.minute(), create_time_dt.second());
|
||||
|
||||
*buf >> "commands: ";
|
||||
commands.readText(*buf);
|
||||
commands->readText(*buf);
|
||||
*buf >> "\n";
|
||||
|
||||
if (buf->eof())
|
||||
@ -177,7 +178,7 @@ std::shared_ptr<const IBackupEntry> MergeTreeMutationEntry::backup() const
|
||||
out << "block number: " << block_number << "\n";
|
||||
|
||||
out << "commands: ";
|
||||
commands.writeText(out, /* with_pure_metadata_commands = */ false);
|
||||
commands->writeText(out, /* with_pure_metadata_commands = */ false);
|
||||
out << "\n";
|
||||
|
||||
return std::make_shared<BackupEntryFromMemory>(out.str());
|
||||
|
@ -16,7 +16,7 @@ class IBackupEntry;
|
||||
struct MergeTreeMutationEntry
|
||||
{
|
||||
time_t create_time = 0;
|
||||
MutationCommands commands;
|
||||
std::shared_ptr<MutationCommands> commands;
|
||||
|
||||
DiskPtr disk;
|
||||
String path_prefix;
|
||||
|
@ -85,6 +85,7 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get()
|
||||
|
||||
MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -95,6 +96,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(mutations_snapshot_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
@ -103,7 +105,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
||||
column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, WithContext(context_)
|
||||
, prefetch_threadpool(getContext()->getPrefetchThreadpool())
|
||||
, log(getLogger("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")"))
|
||||
{
|
||||
|
@ -14,11 +14,12 @@ using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
|
||||
/// A class which is responsible for creating read tasks
|
||||
/// which are later taken by readers via getTask method.
|
||||
/// Does prefetching for the read tasks it creates.
|
||||
class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithContext
|
||||
class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase
|
||||
{
|
||||
public:
|
||||
MergeTreePrefetchedReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
|
@ -29,6 +29,7 @@ extern const int BAD_ARGUMENTS;
|
||||
|
||||
MergeTreeReadPool::MergeTreeReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -39,6 +40,7 @@ MergeTreeReadPool::MergeTreeReadPool(
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(mutations_snapshot_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
|
@ -26,6 +26,7 @@ public:
|
||||
|
||||
MergeTreeReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
|
@ -4,9 +4,6 @@
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -17,6 +14,7 @@ namespace ErrorCodes
|
||||
|
||||
MergeTreeReadPoolBase::MergeTreeReadPoolBase(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -25,7 +23,9 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase(
|
||||
const Names & column_names_,
|
||||
const PoolSettings & pool_settings_,
|
||||
const ContextPtr & context_)
|
||||
: parts_ranges(std::move(parts_))
|
||||
: WithContext(context_)
|
||||
, parts_ranges(std::move(parts_))
|
||||
, mutations_snapshot(std::move(mutations_snapshot_))
|
||||
, shared_virtual_fields(std::move(shared_virtual_fields_))
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, prewhere_info(prewhere_info_)
|
||||
@ -120,9 +120,9 @@ void MergeTreeReadPoolBase::fillPerPartInfos(const Settings & settings)
|
||||
}
|
||||
|
||||
read_task_info.part_index_in_query = part_with_ranges.part_index_in_query;
|
||||
read_task_info.alter_conversions = part_with_ranges.alter_conversions;
|
||||
read_task_info.alter_conversions = MergeTreeData::getAlterConversionsForPart(part_with_ranges.data_part, mutations_snapshot, storage_snapshot->metadata, getContext());
|
||||
|
||||
LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions);
|
||||
LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, read_task_info.alter_conversions);
|
||||
|
||||
read_task_info.task_columns = getReadTaskColumns(
|
||||
part_info,
|
||||
|
@ -6,9 +6,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class MergeTreeReadPoolBase : public IMergeTreeReadPool
|
||||
class MergeTreeReadPoolBase : public IMergeTreeReadPool, protected WithContext
|
||||
{
|
||||
public:
|
||||
using MutationsSnapshotPtr = MergeTreeData::MutationsSnapshotPtr;
|
||||
|
||||
struct PoolSettings
|
||||
{
|
||||
size_t threads = 0;
|
||||
@ -23,6 +25,7 @@ public:
|
||||
|
||||
MergeTreeReadPoolBase(
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -37,6 +40,7 @@ public:
|
||||
protected:
|
||||
/// Initialized in constructor
|
||||
const RangesInDataParts parts_ranges;
|
||||
const MutationsSnapshotPtr mutations_snapshot;
|
||||
const VirtualFields shared_virtual_fields;
|
||||
const StorageSnapshotPtr storage_snapshot;
|
||||
const PrewhereInfoPtr prewhere_info;
|
||||
|
@ -12,6 +12,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder(
|
||||
bool has_limit_below_one_block_,
|
||||
MergeTreeReadType read_type_,
|
||||
RangesInDataParts parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -22,6 +23,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder(
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(mutations_snapshot_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
|
@ -11,6 +11,7 @@ public:
|
||||
bool has_limit_below_one_block_,
|
||||
MergeTreeReadType read_type_,
|
||||
RangesInDataParts parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
|
@ -14,6 +14,7 @@ namespace ErrorCodes
|
||||
MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
|
||||
ParallelReadingExtension extension_,
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -24,6 +25,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(mutations_snapshot_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
|
@ -11,6 +11,7 @@ public:
|
||||
MergeTreeReadPoolParallelReplicas(
|
||||
ParallelReadingExtension extension_,
|
||||
RangesInDataParts && parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
|
@ -13,6 +13,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
|
||||
ParallelReadingExtension extension_,
|
||||
CoordinationMode mode_,
|
||||
RangesInDataParts parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
@ -23,6 +24,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(mutations_snapshot_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
|
@ -12,6 +12,7 @@ public:
|
||||
ParallelReadingExtension extension_,
|
||||
CoordinationMode mode_,
|
||||
RangesInDataParts parts_,
|
||||
MutationsSnapshotPtr mutations_snapshot_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
AlterConversionsPtr alter_conversions_,
|
||||
Names columns_to_read_,
|
||||
std::optional<MarkRanges> mark_ranges_,
|
||||
bool apply_deleted_mask,
|
||||
@ -58,6 +59,9 @@ private:
|
||||
/// Data part will not be removed if the pointer owns it
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
|
||||
/// Alter and mutation commands that are required to be applied to the part on-fly.
|
||||
AlterConversionsPtr alter_conversions;
|
||||
|
||||
/// Columns we have to read (each Block from read will contain them)
|
||||
Names columns_to_read;
|
||||
|
||||
@ -87,6 +91,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
AlterConversionsPtr alter_conversions_,
|
||||
Names columns_to_read_,
|
||||
std::optional<MarkRanges> mark_ranges_,
|
||||
bool apply_deleted_mask,
|
||||
@ -96,6 +101,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
, storage(storage_)
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, data_part(std::move(data_part_))
|
||||
, alter_conversions(std::move(alter_conversions_))
|
||||
, columns_to_read(std::move(columns_to_read_))
|
||||
, read_with_direct_io(read_with_direct_io_)
|
||||
, mark_ranges(std::move(mark_ranges_))
|
||||
@ -109,8 +115,6 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part",
|
||||
data_part->getMarksCount(), data_part->name, data_part->rows_count);
|
||||
|
||||
auto alter_conversions = storage.getAlterConversionsForPart(data_part);
|
||||
|
||||
/// Note, that we don't check setting collaborate_with_coordinator presence, because this source
|
||||
/// is only used in background merges.
|
||||
addTotalRowsApprox(data_part->rows_count);
|
||||
@ -299,6 +303,7 @@ Pipe createMergeTreeSequentialSource(
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
AlterConversionsPtr alter_conversions,
|
||||
Names columns_to_read,
|
||||
std::optional<MarkRanges> mark_ranges,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count,
|
||||
@ -315,7 +320,8 @@ Pipe createMergeTreeSequentialSource(
|
||||
columns_to_read.emplace_back(RowExistsColumn::name);
|
||||
|
||||
auto column_part_source = std::make_shared<MergeTreeSequentialSource>(type,
|
||||
storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
|
||||
storage, storage_snapshot, data_part, alter_conversions,
|
||||
columns_to_read, std::move(mark_ranges),
|
||||
/*apply_deleted_mask=*/ false, read_with_direct_io, prefetch);
|
||||
|
||||
Pipe pipe(std::move(column_part_source));
|
||||
@ -346,6 +352,7 @@ public:
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
AlterConversionsPtr alter_conversions_,
|
||||
Names columns_to_read_,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count_,
|
||||
bool apply_deleted_mask_,
|
||||
@ -359,6 +366,7 @@ public:
|
||||
, storage(storage_)
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, data_part(std::move(data_part_))
|
||||
, alter_conversions(std::move(alter_conversions_))
|
||||
, columns_to_read(std::move(columns_to_read_))
|
||||
, filtered_rows_count(std::move(filtered_rows_count_))
|
||||
, apply_deleted_mask(apply_deleted_mask_)
|
||||
@ -405,6 +413,7 @@ public:
|
||||
storage,
|
||||
storage_snapshot,
|
||||
data_part,
|
||||
alter_conversions,
|
||||
columns_to_read,
|
||||
std::move(mark_ranges),
|
||||
filtered_rows_count,
|
||||
@ -420,6 +429,7 @@ private:
|
||||
const MergeTreeData & storage;
|
||||
const StorageSnapshotPtr storage_snapshot;
|
||||
const MergeTreeData::DataPartPtr data_part;
|
||||
const AlterConversionsPtr alter_conversions;
|
||||
const Names columns_to_read;
|
||||
const std::shared_ptr<std::atomic<size_t>> filtered_rows_count;
|
||||
const bool apply_deleted_mask;
|
||||
@ -436,6 +446,7 @@ void createReadFromPartStep(
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
AlterConversionsPtr alter_conversions,
|
||||
Names columns_to_read,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count,
|
||||
bool apply_deleted_mask,
|
||||
@ -450,6 +461,7 @@ void createReadFromPartStep(
|
||||
storage,
|
||||
storage_snapshot,
|
||||
std::move(data_part),
|
||||
std::move(alter_conversions),
|
||||
std::move(columns_to_read),
|
||||
filtered_rows_count,
|
||||
apply_deleted_mask,
|
||||
|
@ -21,6 +21,7 @@ Pipe createMergeTreeSequentialSource(
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
AlterConversionsPtr alter_conversions,
|
||||
Names columns_to_read,
|
||||
std::optional<MarkRanges> mark_ranges,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count,
|
||||
@ -36,6 +37,7 @@ void createReadFromPartStep(
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
AlterConversionsPtr alter_conversions,
|
||||
Names columns_to_read,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count,
|
||||
bool apply_deleted_mask,
|
||||
|
@ -115,6 +115,7 @@ static UInt64 getExistingRowsCount(const Block & block)
|
||||
static void splitAndModifyMutationCommands(
|
||||
MergeTreeData::DataPartPtr part,
|
||||
StorageMetadataPtr metadata_snapshot,
|
||||
AlterConversionsPtr alter_conversions,
|
||||
const MutationCommands & commands,
|
||||
MutationCommands & for_interpreter,
|
||||
MutationCommands & for_file_renames,
|
||||
@ -180,8 +181,6 @@ static void splitAndModifyMutationCommands(
|
||||
|
||||
}
|
||||
|
||||
auto alter_conversions = part->storage.getAlterConversionsForPart(part);
|
||||
|
||||
/// We don't add renames from commands, instead we take them from rename_map.
|
||||
/// It's important because required renames depend not only on part's data version (i.e. mutation version)
|
||||
/// but also on part's metadata version. Why we have such logic only for renames? Because all other types of alter
|
||||
@ -297,7 +296,6 @@ static void splitAndModifyMutationCommands(
|
||||
}
|
||||
}
|
||||
|
||||
auto alter_conversions = part->storage.getAlterConversionsForPart(part);
|
||||
/// We don't add renames from commands, instead we take them from rename_map.
|
||||
/// It's important because required renames depend not only on part's data version (i.e. mutation version)
|
||||
/// but also on part's metadata version. Why we have such logic only for renames? Because all other types of alter
|
||||
@ -2042,6 +2040,15 @@ bool MutateTask::prepare()
|
||||
|
||||
ctx->num_mutations = std::make_unique<CurrentMetrics::Increment>(CurrentMetrics::PartMutation);
|
||||
|
||||
MergeTreeData::IMutationsSnapshot::Params params
|
||||
{
|
||||
.metadata_version = ctx->metadata_snapshot->getMetadataVersion(),
|
||||
.min_part_metadata_version = ctx->source_part->getMetadataVersion(),
|
||||
};
|
||||
|
||||
auto mutations_snapshot = ctx->data->getMutationsSnapshot(params);
|
||||
auto alter_conversions = MergeTreeData::getAlterConversionsForPart(ctx->source_part, mutations_snapshot, ctx->metadata_snapshot, ctx->context);
|
||||
|
||||
auto context_for_reading = Context::createCopy(ctx->context);
|
||||
|
||||
/// Allow mutations to work when force_index_by_date or force_primary_key is on.
|
||||
@ -2056,7 +2063,7 @@ bool MutateTask::prepare()
|
||||
ctx->commands_for_part.emplace_back(command);
|
||||
|
||||
if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
|
||||
ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading))
|
||||
ctx->source_part, mutations_snapshot, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading))
|
||||
{
|
||||
NameSet files_to_copy_instead_of_hardlinks;
|
||||
auto settings_ptr = ctx->data->getSettings();
|
||||
@ -2116,8 +2123,13 @@ bool MutateTask::prepare()
|
||||
context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1);
|
||||
|
||||
MutationHelpers::splitAndModifyMutationCommands(
|
||||
ctx->source_part, ctx->metadata_snapshot,
|
||||
ctx->commands_for_part, ctx->for_interpreter, ctx->for_file_renames, ctx->log);
|
||||
ctx->source_part,
|
||||
ctx->metadata_snapshot,
|
||||
alter_conversions,
|
||||
ctx->commands_for_part,
|
||||
ctx->for_interpreter,
|
||||
ctx->for_file_renames,
|
||||
ctx->log);
|
||||
|
||||
ctx->stage_progress = std::make_unique<MergeStageProgress>(1.0);
|
||||
|
||||
@ -2131,7 +2143,8 @@ bool MutateTask::prepare()
|
||||
settings.apply_deleted_mask = false;
|
||||
|
||||
ctx->interpreter = std::make_unique<MutationsInterpreter>(
|
||||
*ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter,
|
||||
*ctx->data, ctx->source_part, alter_conversions,
|
||||
ctx->metadata_snapshot, ctx->for_interpreter,
|
||||
ctx->metadata_snapshot->getColumns().getNamesOfPhysical(), context_for_reading, settings);
|
||||
|
||||
ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
|
||||
|
@ -42,7 +42,6 @@ struct RangesInDataPartsDescription: public std::deque<RangesInDataPartDescripti
|
||||
struct RangesInDataPart
|
||||
{
|
||||
DataPartPtr data_part;
|
||||
AlterConversionsPtr alter_conversions;
|
||||
size_t part_index_in_query;
|
||||
MarkRanges ranges;
|
||||
MarkRanges exact_ranges;
|
||||
@ -51,14 +50,13 @@ struct RangesInDataPart
|
||||
|
||||
RangesInDataPart(
|
||||
const DataPartPtr & data_part_,
|
||||
const AlterConversionsPtr & alter_conversions_,
|
||||
const size_t part_index_in_query_,
|
||||
const MarkRanges & ranges_ = MarkRanges{})
|
||||
: data_part{data_part_}
|
||||
, alter_conversions{alter_conversions_}
|
||||
, part_index_in_query{part_index_in_query_}
|
||||
, ranges{ranges_}
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
RangesInDataPartDescription getDescription() const;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user