mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Merge branch 'master' into faster_replicated_ddl
This commit is contained in:
commit
d9e3e9b69f
@ -144,6 +144,7 @@ Checks: '-*,
|
||||
clang-analyzer-cplusplus.SelfAssignment,
|
||||
clang-analyzer-deadcode.DeadStores,
|
||||
clang-analyzer-cplusplus.Move,
|
||||
clang-analyzer-optin.cplusplus.UninitializedObject,
|
||||
clang-analyzer-optin.cplusplus.VirtualCall,
|
||||
clang-analyzer-security.insecureAPI.UncheckedReturn,
|
||||
clang-analyzer-security.insecureAPI.bcmp,
|
||||
|
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,4 +1,4 @@
|
||||
Changelog category (leave one):
|
||||
### Changelog category (leave one):
|
||||
- New Feature
|
||||
- Improvement
|
||||
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
@ -9,7 +9,7 @@ Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
|
||||
|
||||
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
...
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
set (ENABLE_KRB5_DEFAULT 1)
|
||||
set (ENABLE_KRB5_DEFAULT ${ENABLE_LIBRARIES})
|
||||
if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING))
|
||||
message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments")
|
||||
set (ENABLE_KRB5_DEFAULT 0)
|
||||
|
2
contrib/poco
vendored
2
contrib/poco
vendored
@ -1 +1 @@
|
||||
Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1
|
||||
Subproject commit 008b16469471d55b176db181756c94e3f14dd2dc
|
2
contrib/unixodbc
vendored
2
contrib/unixodbc
vendored
@ -1 +1 @@
|
||||
Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168
|
||||
Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd
|
@ -20,6 +20,8 @@ ENV LANG=en_US.UTF-8 \
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
|
||||
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
|
@ -94,8 +94,9 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
&& apt-get update \
|
||||
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
|
||||
&& for package in ${PACKAGES}; do \
|
||||
apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \
|
||||
packages="${packages} ${package}=${VERSION}" \
|
||||
; done \
|
||||
&& apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \
|
||||
; fi \
|
||||
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
|
||||
&& rm -rf \
|
||||
|
@ -77,7 +77,7 @@ A function configuration contains the following settings:
|
||||
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
|
||||
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
|
||||
- `return_type` - the type of a returned value.
|
||||
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
|
||||
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
|
||||
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
|
||||
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
|
||||
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
|
||||
|
@ -810,7 +810,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
|
||||
|
||||
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
|
||||
("compression", po::value<bool>(), "enable or disable compression")
|
||||
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
|
||||
|
||||
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
|
||||
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
|
||||
|
@ -49,6 +49,18 @@ if (COMPILER_GCC)
|
||||
add_definitions ("-fno-tree-loop-distribute-patterns")
|
||||
endif ()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Common/TargetSpecific.h`
|
||||
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
|
||||
|
||||
if (ENABLE_MULTITARGET_CODE)
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=1)
|
||||
else()
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=0)
|
||||
endif()
|
||||
|
||||
|
||||
add_subdirectory (Access)
|
||||
add_subdirectory (Backups)
|
||||
add_subdirectory (Columns)
|
||||
|
@ -125,7 +125,7 @@ class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBas
|
||||
|
||||
public:
|
||||
FindResultImpl()
|
||||
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0)
|
||||
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0) // NOLINT(clang-analyzer-optin.cplusplus.UninitializedObject) intentionally allow uninitialized value here
|
||||
{}
|
||||
|
||||
FindResultImpl(Mapped * value_, bool found_, size_t off)
|
||||
|
@ -214,6 +214,9 @@ private:
|
||||
|
||||
/// offset in bits to the next to the rightmost bit at that byte; or zero if the rightmost bit is the rightmost bit in that byte.
|
||||
offset_r = (l + content_width) % 8;
|
||||
|
||||
content_l = nullptr;
|
||||
content_r = nullptr;
|
||||
}
|
||||
|
||||
UInt8 ALWAYS_INLINE read(UInt8 value_l) const
|
||||
|
@ -61,7 +61,7 @@ private:
|
||||
class JSONBool : public IItem
|
||||
{
|
||||
public:
|
||||
explicit JSONBool(bool value_) : value(std::move(value_)) {}
|
||||
explicit JSONBool(bool value_) : value(value_) {}
|
||||
void format(const FormatSettings & settings, FormatContext & context) override;
|
||||
|
||||
private:
|
||||
@ -74,7 +74,7 @@ public:
|
||||
void add(ItemPtr value) { values.push_back(std::move(value)); }
|
||||
void add(std::string value) { add(std::make_unique<JSONString>(std::move(value))); }
|
||||
void add(const char * value) { add(std::make_unique<JSONString>(value)); }
|
||||
void add(bool value) { add(std::make_unique<JSONBool>(std::move(value))); }
|
||||
void add(bool value) { add(std::make_unique<JSONBool>(value)); }
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
@ -99,7 +99,7 @@ public:
|
||||
void add(std::string key, std::string value) { add(std::move(key), std::make_unique<JSONString>(std::move(value))); }
|
||||
void add(std::string key, const char * value) { add(std::move(key), std::make_unique<JSONString>(value)); }
|
||||
void add(std::string key, std::string_view value) { add(std::move(key), std::make_unique<JSONString>(value)); }
|
||||
void add(std::string key, bool value) { add(std::move(key), std::make_unique<JSONBool>(std::move(value))); }
|
||||
void add(std::string key, bool value) { add(std::move(key), std::make_unique<JSONBool>(value)); }
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
|
15
src/Common/NamePrompter.cpp
Normal file
15
src/Common/NamePrompter.cpp
Normal file
@ -0,0 +1,15 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/NamePrompter.h>
|
||||
|
||||
namespace DB::detail
|
||||
{
|
||||
void appendHintsMessageImpl(String & message, const std::vector<String> & hints)
|
||||
{
|
||||
if (hints.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
message += ". Maybe you meant: " + toString(hints);
|
||||
}
|
||||
}
|
@ -90,6 +90,10 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
void appendHintsMessageImpl(String & message, const std::vector<String> & hints);
|
||||
}
|
||||
|
||||
template <size_t MaxNumHints, typename Self>
|
||||
class IHints
|
||||
@ -102,6 +106,12 @@ public:
|
||||
return prompter.getHints(name, getAllRegisteredNames());
|
||||
}
|
||||
|
||||
void appendHintsMessage(String & message, const String & name) const
|
||||
{
|
||||
auto hints = getHints(name);
|
||||
detail::appendHintsMessageImpl(message, hints);
|
||||
}
|
||||
|
||||
IHints() = default;
|
||||
|
||||
IHints(const IHints &) = default;
|
||||
@ -114,5 +124,4 @@ public:
|
||||
private:
|
||||
NamePrompter<MaxNumHints> prompter;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
#include <Common/CpuId.h>
|
||||
|
@ -46,7 +46,8 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
|
||||
return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different names of columns:\n"
|
||||
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
|
||||
|
||||
if (!actual.type->equals(*expected.type))
|
||||
if ((actual.type && !expected.type) || (!actual.type && expected.type)
|
||||
|| (actual.type && expected.type && !actual.type->equals(*expected.type)))
|
||||
return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different types:\n"
|
||||
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
|
||||
|
||||
|
@ -15,10 +15,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/** Cursor allows to compare rows in different blocks (and parts).
|
||||
* Cursor moves inside single block.
|
||||
@ -61,25 +57,21 @@ struct SortCursorImpl
|
||||
reset(block, perm);
|
||||
}
|
||||
|
||||
SortCursorImpl(const Columns & columns, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr)
|
||||
SortCursorImpl(
|
||||
const Block & header,
|
||||
const Columns & columns,
|
||||
const SortDescription & desc_,
|
||||
size_t order_ = 0,
|
||||
IColumn::Permutation * perm = nullptr)
|
||||
: desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
|
||||
{
|
||||
for (auto & column_desc : desc)
|
||||
{
|
||||
if (!column_desc.column_name.empty())
|
||||
throw Exception("SortDescription should contain column position if SortCursor was used without header.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
reset(columns, {}, perm);
|
||||
reset(columns, header, perm);
|
||||
}
|
||||
|
||||
bool empty() const { return rows == 0; }
|
||||
|
||||
/// Set the cursor to the beginning of the new block.
|
||||
void reset(const Block & block, IColumn::Permutation * perm = nullptr)
|
||||
{
|
||||
reset(block.getColumns(), block, perm);
|
||||
}
|
||||
void reset(const Block & block, IColumn::Permutation * perm = nullptr) { reset(block.getColumns(), block, perm); }
|
||||
|
||||
/// Set the cursor to the beginning of the new block.
|
||||
void reset(const Columns & columns, const Block & block, IColumn::Permutation * perm = nullptr)
|
||||
@ -95,9 +87,7 @@ struct SortCursorImpl
|
||||
for (size_t j = 0, size = desc.size(); j < size; ++j)
|
||||
{
|
||||
auto & column_desc = desc[j];
|
||||
size_t column_number = !column_desc.column_name.empty()
|
||||
? block.getPositionByName(column_desc.column_name)
|
||||
: column_desc.column_number;
|
||||
size_t column_number = block.getPositionByName(column_desc.column_name);
|
||||
sort_columns.push_back(columns[column_number].get());
|
||||
|
||||
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported();
|
||||
@ -367,12 +357,12 @@ private:
|
||||
};
|
||||
|
||||
template <typename TLeftColumns, typename TRightColumns>
|
||||
bool less(const TLeftColumns & lhs, const TRightColumns & rhs, size_t i, size_t j, const SortDescription & descr)
|
||||
bool less(const TLeftColumns & lhs, const TRightColumns & rhs, size_t i, size_t j, const SortDescriptionWithPositions & descr)
|
||||
{
|
||||
for (const auto & elem : descr)
|
||||
{
|
||||
size_t ind = elem.column_number;
|
||||
int res = elem.direction * lhs[ind]->compareAt(i, j, *rhs[ind], elem.nulls_direction);
|
||||
int res = elem.base.direction * lhs[ind]->compareAt(i, j, *rhs[ind], elem.base.nulls_direction);
|
||||
if (res < 0)
|
||||
return true;
|
||||
else if (res > 0)
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include <Core/SortDescription.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/SortDescription.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out)
|
||||
void dumpSortDescription(const SortDescription & description, WriteBuffer & out)
|
||||
{
|
||||
bool first = true;
|
||||
|
||||
@ -16,17 +16,7 @@ void dumpSortDescription(const SortDescription & description, const Block & head
|
||||
out << ", ";
|
||||
first = false;
|
||||
|
||||
if (!desc.column_name.empty())
|
||||
out << desc.column_name;
|
||||
else
|
||||
{
|
||||
if (desc.column_number < header.columns())
|
||||
out << header.getByPosition(desc.column_number).name;
|
||||
else
|
||||
out << "?";
|
||||
|
||||
out << " (pos " << desc.column_number << ")";
|
||||
}
|
||||
out << desc.column_name;
|
||||
|
||||
if (desc.direction > 0)
|
||||
out << " ASC";
|
||||
@ -38,18 +28,9 @@ void dumpSortDescription(const SortDescription & description, const Block & head
|
||||
}
|
||||
}
|
||||
|
||||
void SortColumnDescription::explain(JSONBuilder::JSONMap & map, const Block & header) const
|
||||
void SortColumnDescription::explain(JSONBuilder::JSONMap & map) const
|
||||
{
|
||||
if (!column_name.empty())
|
||||
map.add("Column", column_name);
|
||||
else
|
||||
{
|
||||
if (column_number < header.columns())
|
||||
map.add("Column", header.getByPosition(column_number).name);
|
||||
|
||||
map.add("Position", column_number);
|
||||
}
|
||||
|
||||
map.add("Column", column_name);
|
||||
map.add("Ascending", direction > 0);
|
||||
map.add("With Fill", with_fill);
|
||||
}
|
||||
@ -57,17 +38,17 @@ void SortColumnDescription::explain(JSONBuilder::JSONMap & map, const Block & he
|
||||
std::string dumpSortDescription(const SortDescription & description)
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
dumpSortDescription(description, Block{}, wb);
|
||||
dumpSortDescription(description, wb);
|
||||
return wb.str();
|
||||
}
|
||||
|
||||
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, const Block & header)
|
||||
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description)
|
||||
{
|
||||
auto json_array = std::make_unique<JSONBuilder::JSONArray>();
|
||||
for (const auto & descr : description)
|
||||
{
|
||||
auto json_map = std::make_unique<JSONBuilder::JSONMap>();
|
||||
descr.explain(*json_map, header);
|
||||
descr.explain(*json_map);
|
||||
json_array->add(std::move(json_map));
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,6 @@ struct FillColumnDescription
|
||||
struct SortColumnDescription
|
||||
{
|
||||
std::string column_name; /// The name of the column.
|
||||
size_t column_number; /// Column number (used if no name is given).
|
||||
int direction; /// 1 - ascending, -1 - descending.
|
||||
int nulls_direction; /// 1 - NULLs and NaNs are greater, -1 - less.
|
||||
/// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite.
|
||||
@ -48,23 +47,24 @@ struct SortColumnDescription
|
||||
FillColumnDescription fill_description;
|
||||
|
||||
explicit SortColumnDescription(
|
||||
size_t column_number_, int direction_ = 1, int nulls_direction_ = 1,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
|
||||
, with_fill(with_fill_), fill_description(fill_description_) {}
|
||||
|
||||
explicit SortColumnDescription(
|
||||
const std::string & column_name_, int direction_ = 1, int nulls_direction_ = 1,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
|
||||
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {}
|
||||
const std::string & column_name_,
|
||||
int direction_ = 1,
|
||||
int nulls_direction_ = 1,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false,
|
||||
const FillColumnDescription & fill_description_ = {})
|
||||
: column_name(column_name_)
|
||||
, direction(direction_)
|
||||
, nulls_direction(nulls_direction_)
|
||||
, collator(collator_)
|
||||
, with_fill(with_fill_)
|
||||
, fill_description(fill_description_)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator == (const SortColumnDescription & other) const
|
||||
{
|
||||
return column_name == other.column_name && column_number == other.column_number
|
||||
&& direction == other.direction && nulls_direction == other.nulls_direction;
|
||||
return column_name == other.column_name && direction == other.direction && nulls_direction == other.nulls_direction;
|
||||
}
|
||||
|
||||
bool operator != (const SortColumnDescription & other) const
|
||||
@ -72,22 +72,30 @@ struct SortColumnDescription
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
std::string dump() const
|
||||
{
|
||||
return fmt::format("{}:{}:dir {}nulls ", column_name, column_number, direction, nulls_direction);
|
||||
}
|
||||
std::string dump() const { return fmt::format("{}:dir {}nulls {}", column_name, direction, nulls_direction); }
|
||||
|
||||
void explain(JSONBuilder::JSONMap & map, const Block & header) const;
|
||||
void explain(JSONBuilder::JSONMap & map) const;
|
||||
};
|
||||
|
||||
struct SortColumnDescriptionWithColumnIndex
|
||||
{
|
||||
SortColumnDescription base;
|
||||
size_t column_number;
|
||||
|
||||
SortColumnDescriptionWithColumnIndex(SortColumnDescription description_, size_t column_number_)
|
||||
: base(std::move(description_)), column_number(column_number_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/// Description of the sorting rule for several columns.
|
||||
using SortDescription = std::vector<SortColumnDescription>;
|
||||
using SortDescriptionWithPositions = std::vector<SortColumnDescriptionWithColumnIndex>;
|
||||
|
||||
/// Outputs user-readable description into `out`.
|
||||
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out);
|
||||
void dumpSortDescription(const SortDescription & description, WriteBuffer & out);
|
||||
|
||||
std::string dumpSortDescription(const SortDescription & description);
|
||||
|
||||
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, const Block & header);
|
||||
|
||||
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description);
|
||||
}
|
||||
|
@ -128,22 +128,21 @@ static auto extractVector(const std::vector<Tuple> & vec)
|
||||
return res;
|
||||
}
|
||||
|
||||
void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, const NamesAndTypesList & extended_storage_columns)
|
||||
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns)
|
||||
{
|
||||
std::unordered_map<String, DataTypePtr> storage_columns_map;
|
||||
for (const auto & [name, type] : extended_storage_columns)
|
||||
storage_columns_map[name] = type;
|
||||
|
||||
for (auto & name_type : columns_list)
|
||||
for (auto & column : block)
|
||||
{
|
||||
if (!isObject(name_type.type))
|
||||
if (!isObject(column.type))
|
||||
continue;
|
||||
|
||||
auto & column = block.getByName(name_type.name);
|
||||
if (!isObject(column.type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
|
||||
name_type.name, name_type.type->getName(), column.type->getName());
|
||||
column.name, column.type->getName(), column.type->getName());
|
||||
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
|
||||
const auto & subcolumns = column_object.getSubcolumns();
|
||||
@ -151,7 +150,7 @@ void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, con
|
||||
if (!column_object.isFinalized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
|
||||
name_type.name, name_type.type->getName());
|
||||
column.name, column.type->getName());
|
||||
|
||||
PathsInData tuple_paths;
|
||||
DataTypes tuple_types;
|
||||
@ -164,12 +163,11 @@ void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, con
|
||||
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
|
||||
}
|
||||
|
||||
auto it = storage_columns_map.find(name_type.name);
|
||||
auto it = storage_columns_map.find(column.name);
|
||||
if (it == storage_columns_map.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", name_type.name);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
|
||||
|
||||
std::tie(column.column, column.type) = unflattenTuple(tuple_paths, tuple_types, tuple_columns);
|
||||
name_type.type = column.type;
|
||||
|
||||
/// Check that constructed Tuple type and type in storage are compatible.
|
||||
getLeastCommonTypeForObject({column.type, it->second}, true);
|
||||
|
@ -38,7 +38,7 @@ DataTypePtr getDataTypeByColumn(const IColumn & column);
|
||||
|
||||
/// Converts Object types and columns to Tuples in @columns_list and @block
|
||||
/// and checks that types are consistent with types in @extended_storage_columns.
|
||||
void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, const NamesAndTypesList & extended_storage_columns);
|
||||
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
|
||||
|
||||
/// Checks that each path is not the prefix of any other path.
|
||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
|
||||
|
@ -334,15 +334,17 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
||||
read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET);
|
||||
}
|
||||
|
||||
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
||||
auto download_offset = file_segment->getDownloadOffset();
|
||||
if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
|
||||
{
|
||||
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, "
|
||||
"implementation buffer reading until: {}, file segment info: {}",
|
||||
file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(),
|
||||
impl_range.left, *impl_range.right, file_segment->getInfoForLog());
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@ -802,12 +804,14 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
|
||||
|
||||
String CachedReadBufferFromRemoteFS::getInfoForLog()
|
||||
{
|
||||
auto implementation_buffer_read_range_str =
|
||||
implementation_buffer ?
|
||||
std::to_string(implementation_buffer->getRemainingReadRange().left)
|
||||
+ '-'
|
||||
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
|
||||
: "None";
|
||||
String implementation_buffer_read_range_str;
|
||||
if (implementation_buffer)
|
||||
{
|
||||
auto read_range = implementation_buffer->getRemainingReadRange();
|
||||
implementation_buffer_read_range_str = std::to_string(read_range.left) + '-' + (read_range.right ? std::to_string(*read_range.right) : "None");
|
||||
}
|
||||
else
|
||||
implementation_buffer_read_range_str = "None";
|
||||
|
||||
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();
|
||||
|
||||
|
@ -96,17 +96,6 @@ if (TARGET ch_contrib::rapidjson)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Functions/TargetSpecific.h`
|
||||
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
|
||||
|
||||
if (ENABLE_MULTITARGET_CODE)
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=1)
|
||||
else()
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=0)
|
||||
endif()
|
||||
|
||||
add_subdirectory(GatherUtils)
|
||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
#include <base/map.h>
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/GatherUtils/GatherUtils.h>
|
||||
#include <Functions/GatherUtils/Sources.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
|
@ -38,8 +38,8 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <base/bit_cast.h>
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include <base/range.h>
|
||||
#include "GatherUtils.h"
|
||||
#include "sliceEqualElements.h"
|
||||
#include "sliceHasImplAnyAll.h"
|
||||
|
||||
|
||||
namespace DB::ErrorCodes
|
||||
@ -461,39 +463,19 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
|
||||
}
|
||||
|
||||
|
||||
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
template <typename T>
|
||||
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
for (size_t i = 0; i < second.size; ++i)
|
||||
{
|
||||
bool has = false;
|
||||
for (size_t j = 0; j < first.size && !has; ++j)
|
||||
{
|
||||
const bool is_first_null = has_first_null_map && first_null_map[j];
|
||||
const bool is_second_null = has_second_null_map && second_null_map[i];
|
||||
|
||||
if (is_first_null && is_second_null)
|
||||
has = true;
|
||||
|
||||
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
|
||||
has = true;
|
||||
}
|
||||
|
||||
if (has && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
return search_type == ArraySearchType::All;
|
||||
if constexpr (is_decimal<T>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
|
||||
}
|
||||
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <
|
||||
@ -620,55 +602,6 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
|
||||
return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename U>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
const NumericArraySlice<U> & second [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
/// TODO: Decimal scale
|
||||
if constexpr (is_decimal<T> && is_decimal<U>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
|
||||
else if constexpr (is_decimal<T> || is_decimal<U>)
|
||||
return false;
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
if constexpr (is_decimal<T>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <ArraySearchType search_type, typename T, typename U>
|
||||
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
|
||||
{
|
||||
@ -854,4 +787,3 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||
|
||||
add_headers_and_sources(clickhouse_functions_gatherutils .)
|
||||
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
|
||||
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
|
||||
@ -14,3 +15,5 @@ endif()
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
|
||||
endif()
|
||||
|
||||
set_target_properties(clickhouse_functions_gatherutils PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")
|
||||
|
41
src/Functions/GatherUtils/sliceEqualElements.h
Normal file
41
src/Functions/GatherUtils/sliceEqualElements.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include "Slices.h"
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
template <typename T, typename U>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
const NumericArraySlice<U> & second [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
/// TODO: Decimal scale
|
||||
if constexpr (is_decimal<T> && is_decimal<U>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
|
||||
else if constexpr (is_decimal<T> || is_decimal<U>)
|
||||
return false;
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
|
||||
}
|
||||
|
||||
}
|
943
src/Functions/GatherUtils/sliceHasImplAnyAll.h
Normal file
943
src/Functions/GatherUtils/sliceHasImplAnyAll.h
Normal file
@ -0,0 +1,943 @@
|
||||
#pragma once
|
||||
|
||||
#include "GatherUtils.h"
|
||||
#include "Slices.h"
|
||||
#include "sliceEqualElements.h"
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE bool hasNull(const UInt8 * null_map, size_t null_map_size)
|
||||
{
|
||||
if (null_map == nullptr)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < null_map_size; ++i)
|
||||
{
|
||||
if (null_map[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline ALWAYS_INLINE bool hasAllIntegralLoopRemainder(
|
||||
size_t j, const NumericArraySlice<T> & first, const NumericArraySlice<T> & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
for (; j < second.size; ++j)
|
||||
{
|
||||
// skip null elements since both have at least one - assuming it was checked earlier that at least one element in 'first' is null
|
||||
if (has_second_null_map && second_null_map[j])
|
||||
continue;
|
||||
|
||||
bool found = false;
|
||||
|
||||
for (size_t i = 0; i < first.size; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
if (first.data[i] == second.data[j])
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
DECLARE_AVX2_SPECIFIC_CODE (
|
||||
|
||||
// AVX2 Int64, UInt64 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr Int64 full = -1, none = 0;
|
||||
const __m256i ones = _mm256_set1_epi64x(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
|
||||
if (second.size > 3 && first.size > 3)
|
||||
{
|
||||
for (; j < second.size - 3 && has_mask; j += 4)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi64x(
|
||||
(second_null_map[j + 3])? full : none,
|
||||
(second_null_map[j + 2])? full : none,
|
||||
(second_null_map[j + 1])? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 3 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 4)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 2))),
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi64(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6))))),
|
||||
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi64x(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi64(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 3)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// AVX2 Int32, UInt32 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int full = -1, none = 0;
|
||||
|
||||
const __m256i ones = _mm256_set1_epi32(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
|
||||
if (second.size > 7 && first.size > 7)
|
||||
{
|
||||
for (; j < second.size - 7 && has_mask; j += 8)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi32(
|
||||
(second_null_map[j + 7]) ? full : none,
|
||||
(second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none,
|
||||
(second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none,
|
||||
(second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 7 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 8)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(first.data + i));
|
||||
// Create a mask to avoid to compare null elements
|
||||
// set_m128i takes two arguments: (high segment, low segment) that are two __m128i convert from 8bits to 32bits to match with next operations
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 4))),
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi32(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(6,5,4,3,2,1,0,7)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(6,5,4,3,2,1,0,7))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(4,3,2,1,0,7,6,5)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(4,3,2,1,0,7,6,5)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(2,1,0,7,6,5,4,3)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(2,1,0,7,6,5,4,3))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(0,7,6,5,4,3,2,1)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(0,7,6,5,4,3,2,1))))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi32(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi32(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 7)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// AVX2 Int16, UInt16 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int16_t full = -1, none = 0;
|
||||
const __m256i ones = _mm256_set1_epi16(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
if (second.size > 15 && first.size > 15)
|
||||
{
|
||||
for (; j < second.size - 15 && has_mask; j += 16)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi16(
|
||||
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
|
||||
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
|
||||
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
|
||||
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8])? full : none,
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6])? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4])? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2])? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 15 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 16)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 8))),
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi16(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18))))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permute2x128_si256(first_nm_mask, first_nm_mask,1),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_permute2x128_si256(first_data, first_data, 1))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
|
||||
)
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi16(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi16(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 15)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
DECLARE_SSE42_SPECIFIC_CODE (
|
||||
|
||||
// SSE4.2 Int64, UInt64 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr Int64 full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 1 && first.size > 1)
|
||||
{
|
||||
for (; j < second.size - 1 && has_mask; j += 2)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi64x(
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < first.size - 1 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 2)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi64(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
|
||||
_mm_cmpeq_epi64(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m128i v_i = _mm_set1_epi64x(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi64(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 1)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// SSE4.2 Int32, UInt32 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 3 && first.size > 3)
|
||||
{
|
||||
for (; j < second.size - 3 && has_mask; j += 4)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi32(
|
||||
(second_null_map[j + 3]) ? full : none,
|
||||
(second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 3 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 4)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi32(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(2,1,0,3)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(2,1,0,3))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(0,3,2,1)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(0,3,2,1)))))
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
__m128i r_i = _mm_set1_epi32(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi32(second_data, r_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 3)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// SSE4.2 Int16, UInt16 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int16_t full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 6 && first.size > 6)
|
||||
{
|
||||
for (; j < second.size - 7 && has_mask; j += 8)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi16(
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full: none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size-7 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 8)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi16(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))))
|
||||
),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
__m128i v_i = _mm_set1_epi16(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi16(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 6)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// Int8/UInt8 version is faster with SSE than with AVX2
|
||||
// SSE2 Int8, UInt8 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int8> || std::is_same_v<IntType, UInt8>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt8(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int8_t full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
|
||||
if (second.size > 15 && first.size > 15)
|
||||
{
|
||||
for (; j < second.size - 15 && has_mask; j += 16)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi8(
|
||||
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
|
||||
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
|
||||
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
|
||||
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8]) ? full : none,
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 15 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 16)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi8(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)))))
|
||||
),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9))))))),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)))))),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)))))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m128i v_i = _mm_set1_epi8(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi8(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 15)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
bool sliceHasImplAnyAllGenericImpl(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
const bool has_second_null = hasNull(second_null_map, second.size);
|
||||
if (has_second_null)
|
||||
{
|
||||
const bool has_first_null = hasNull(first_null_map, first.size);
|
||||
|
||||
if (has_first_null && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has_first_null && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < second.size; ++i)
|
||||
{
|
||||
if (has_second_null_map && second_null_map[i])
|
||||
continue;
|
||||
|
||||
bool has = false;
|
||||
|
||||
for (size_t j = 0; j < first.size && !has; ++j)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[j])
|
||||
continue;
|
||||
|
||||
if (isEqual(first, second, j, i))
|
||||
{
|
||||
has = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
|
||||
return search_type == ArraySearchType::All;
|
||||
}
|
||||
|
||||
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
#if USE_MULTITARGET_CODE
|
||||
if constexpr (search_type == ArraySearchType::All && std::is_same_v<FirstSliceType, SecondSliceType>)
|
||||
{
|
||||
|
||||
#if defined(__AVX2__)
|
||||
if (isArchSupported(TargetArch::AVX2))
|
||||
{
|
||||
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (isArchSupported(TargetArch::SSE42))
|
||||
{
|
||||
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int8>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt8>>)
|
||||
{
|
||||
return TargetSpecific::SSE42::sliceHasImplAnyAllImplInt8(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return sliceHasImplAnyAllGenericImpl<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <cmath>
|
||||
|
||||
|
139
src/Functions/tests/gtest_has_all.cpp
Normal file
139
src/Functions/tests/gtest_has_all.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
#include <random>
|
||||
#include <gtest/gtest.h>
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
|
||||
using namespace DB::GatherUtils;
|
||||
|
||||
|
||||
auto uni_int_dist(int min, int max)
|
||||
{
|
||||
std::random_device rd;
|
||||
std::mt19937 mt(rd());
|
||||
std::uniform_int_distribution<> dist(min, max);
|
||||
return std::make_pair(dist, mt);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_elements, size_t array_size, bool all_elements_present)
|
||||
{
|
||||
for (size_t i = 0; i < array_size; ++i)
|
||||
{
|
||||
array_elements[i] = i;
|
||||
}
|
||||
auto [dist, gen] = uni_int_dist(0, array_size - 1);
|
||||
for (size_t i = 0; i < nb_elements_to_have; ++i)
|
||||
{
|
||||
elements_to_have[i] = array_elements[dist(gen)];
|
||||
}
|
||||
if (!all_elements_present)
|
||||
{
|
||||
/// make one element to be searched for missing from the target array
|
||||
elements_to_have[nb_elements_to_have - 1] = array_size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void nullMapInit(UInt8 * null_map, size_t null_map_size, size_t nb_null_elements)
|
||||
{
|
||||
/// -2 to keep the last element of the array non-null
|
||||
auto [dist, gen] = uni_int_dist(0, null_map_size - 2);
|
||||
for (size_t i = 0; i < null_map_size; ++i)
|
||||
{
|
||||
null_map[i] = 0;
|
||||
}
|
||||
for (size_t i = 0; i < null_map_size - 1 && i < nb_null_elements; ++i)
|
||||
{
|
||||
null_map[dist(gen)] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool testHasAll(size_t nb_elements_to_have, size_t array_size, bool with_null_maps, bool all_elements_present)
|
||||
{
|
||||
auto array_elements = std::make_unique<T[]>(array_size);
|
||||
auto elements_to_have = std::make_unique<T[]>(nb_elements_to_have);
|
||||
|
||||
std::unique_ptr<UInt8[]> first_nm = nullptr, second_nm = nullptr;
|
||||
if (with_null_maps)
|
||||
{
|
||||
first_nm = std::make_unique<UInt8[]>(array_size);
|
||||
second_nm = std::make_unique<UInt8[]>(nb_elements_to_have);
|
||||
/// add a null to elements to have, but not to the target array, making the answer negative
|
||||
nullMapInit(first_nm.get(), array_size, 0);
|
||||
nullMapInit(second_nm.get(), nb_elements_to_have, 1);
|
||||
}
|
||||
|
||||
arrayInit(elements_to_have.get(), nb_elements_to_have, array_elements.get(), array_size, all_elements_present);
|
||||
|
||||
NumericArraySlice<T> first = {array_elements.get(), array_size};
|
||||
NumericArraySlice<T> second = {elements_to_have.get(), nb_elements_to_have};
|
||||
|
||||
/// check whether all elements of the second array are also elements of the first array, overloaded for various combinations of types.
|
||||
return sliceHasImplAnyAll<ArraySearchType::All, NumericArraySlice<T>, NumericArraySlice<T>, sliceEqualElements<T,T> >(
|
||||
first, second, first_nm.get(), second_nm.get());
|
||||
}
|
||||
|
||||
TEST(HasAll, integer)
|
||||
{
|
||||
bool test1 = testHasAll<int>(4, 100, false, true);
|
||||
bool test2 = testHasAll<int>(4, 100, false, false);
|
||||
bool test3 = testHasAll<int>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
|
||||
TEST(HasAll, int64)
|
||||
{
|
||||
bool test1 = testHasAll<int64_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int64_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int64_t>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int64_t>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAll, int16)
|
||||
{
|
||||
bool test1 = testHasAll<int16_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int16_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int16_t>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int16_t>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAll, int8)
|
||||
{
|
||||
bool test1 = testHasAll<int8_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int8_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int8_t>(50, 125, false, true);
|
||||
bool test4 = testHasAll<int8_t>(50, 125, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAllSingleNullElement, all)
|
||||
{
|
||||
bool test1 = testHasAll<int>(4, 100, true, true);
|
||||
bool test2 = testHasAll<int64_t>(4, 100, true, true);
|
||||
bool test3 = testHasAll<int16_t>(4, 100, true, true);
|
||||
bool test4 = testHasAll<int8_t>(4, 100, true, true);
|
||||
|
||||
ASSERT_EQ(test1, false);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, false);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
@ -39,10 +39,6 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~ReadBufferFromFileDescriptor() override
|
||||
{
|
||||
}
|
||||
|
||||
int getFD() const
|
||||
{
|
||||
return fd;
|
||||
@ -84,9 +80,6 @@ public:
|
||||
{
|
||||
use_pread = true;
|
||||
}
|
||||
virtual ~ReadBufferFromFileDescriptorPRead() override
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
||||
|
||||
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||
{
|
||||
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||
return Range{ .left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt };
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
|
@ -33,8 +33,11 @@ private:
|
||||
String key;
|
||||
UInt64 max_single_read_retries;
|
||||
|
||||
off_t offset = 0;
|
||||
off_t read_until_position = 0;
|
||||
/// These variables are atomic because they can be used for `logging only`
|
||||
/// (where it is not important to get consistent result)
|
||||
/// from separate thread other than the one which uses the buffer for s3 reading.
|
||||
std::atomic<off_t> offset = 0;
|
||||
std::atomic<off_t> read_until_position = 0;
|
||||
|
||||
Aws::S3::Model::GetObjectResult read_result;
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
|
@ -1527,6 +1527,21 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
|
||||
return res;
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const
|
||||
{
|
||||
std::unordered_set<const Node *> split_nodes;
|
||||
for (const auto & sort_column : sort_columns)
|
||||
if (const auto * node = tryFindInIndex(sort_column))
|
||||
split_nodes.insert(node);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's index. DAG:\n{}", sort_column, dumpDAG());
|
||||
|
||||
auto res = split(split_nodes);
|
||||
res.second->project_input = project_input;
|
||||
return res;
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
|
||||
{
|
||||
const auto * node = tryFindInIndex(column_name);
|
||||
|
@ -274,6 +274,10 @@ public:
|
||||
/// Index of initial actions must contain column_name.
|
||||
SplitResult splitActionsForFilter(const std::string & column_name) const;
|
||||
|
||||
/// Splits actions into two parts. The first part contains all the calculations required to calculate sort_columns.
|
||||
/// The second contains the rest.
|
||||
SplitResult splitActionsBySortingDescription(const NameSet & sort_columns) const;
|
||||
|
||||
/// Create actions which may calculate part of filter using only available_inputs.
|
||||
/// If nothing may be calculated, returns nullptr.
|
||||
/// Otherwise, return actions which inputs are from available_inputs.
|
||||
|
@ -136,7 +136,7 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog
|
||||
|
||||
if (qualified_name->database.empty())
|
||||
{
|
||||
/// Ether database name is not specified and we should use current one
|
||||
/// Either database name is not specified and we should use current one
|
||||
/// or it's an XML dictionary.
|
||||
bool is_xml_dictionary = has(name);
|
||||
if (is_xml_dictionary)
|
||||
|
@ -2249,10 +2249,6 @@ static bool windowDescriptionComparator(const WindowDescription * _left, const W
|
||||
return true;
|
||||
else if (left[i].column_name > right[i].column_name)
|
||||
return false;
|
||||
else if (left[i].column_number < right[i].column_number)
|
||||
return true;
|
||||
else if (left[i].column_number > right[i].column_number)
|
||||
return false;
|
||||
else if (left[i].direction < right[i].direction)
|
||||
return true;
|
||||
else if (left[i].direction > right[i].direction)
|
||||
|
@ -1025,7 +1025,7 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
{
|
||||
if (header.has(sort_columns[i]))
|
||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||
sort_description.emplace_back(sort_columns[i], 1, 1);
|
||||
else
|
||||
return {};
|
||||
}
|
||||
|
@ -430,8 +430,8 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
|
||||
SortDescription sort_description;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
block_to_sort.insert({ ordered_set[i], nullptr, "_" + toString(i) });
|
||||
sort_description.emplace_back(i, 1, 1);
|
||||
block_to_sort.insert({ordered_set[i], nullptr, ordered_set[i]->getName()});
|
||||
sort_description.emplace_back(ordered_set[i]->getName(), 1, 1);
|
||||
}
|
||||
|
||||
sortBlock(block_to_sort, sort_description);
|
||||
|
@ -379,7 +379,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
for (const auto & name_and_type : log_element_names_and_types)
|
||||
log_element_columns.emplace_back(name_and_type.type, name_and_type.name);
|
||||
|
||||
Block block(log_element_columns);
|
||||
Block block(std::move(log_element_columns));
|
||||
|
||||
MutableColumns columns = block.mutateColumns();
|
||||
for (const auto & elem : to_flush)
|
||||
|
@ -512,14 +512,6 @@ TableJoin::createConvertingActions(const ColumnsWithTypeAndName & left_sample_co
|
||||
template <typename LeftNamesAndTypes, typename RightNamesAndTypes>
|
||||
void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right)
|
||||
{
|
||||
if (strictness() == ASTTableJoin::Strictness::Asof)
|
||||
{
|
||||
if (clauses.size() != 1)
|
||||
throw DB::Exception("ASOF join over multiple keys is not supported", ErrorCodes::NOT_IMPLEMENTED);
|
||||
if (right.back().type->isNullable())
|
||||
throw DB::Exception("ASOF join over right table Nullable column is not implemented", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
if (!left_type_map.empty() || !right_type_map.empty())
|
||||
return;
|
||||
|
||||
@ -531,6 +523,15 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig
|
||||
for (const auto & col : right)
|
||||
right_types[renamedRightColumnName(col.name)] = col.type;
|
||||
|
||||
if (strictness() == ASTTableJoin::Strictness::Asof)
|
||||
{
|
||||
if (clauses.size() != 1)
|
||||
throw DB::Exception("ASOF join over multiple keys is not supported", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
auto asof_key_type = right_types.find(clauses.back().key_names_right.back());
|
||||
if (asof_key_type != right_types.end() && asof_key_type->second->isNullable())
|
||||
throw DB::Exception("ASOF join over right table Nullable column is not implemented", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
forAllKeys(clauses, [&](const auto & left_key_name, const auto & right_key_name)
|
||||
{
|
||||
|
@ -98,9 +98,7 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c
|
||||
{
|
||||
const auto & sort_column_description = description[i];
|
||||
|
||||
const IColumn * column = !sort_column_description.column_name.empty()
|
||||
? block.getByName(sort_column_description.column_name).column.get()
|
||||
: block.safeGetByPosition(sort_column_description.column_number).column.get();
|
||||
const IColumn * column = block.getByName(sort_column_description.column_name).column.get();
|
||||
|
||||
if (isCollationRequired(sort_column_description))
|
||||
{
|
||||
|
@ -81,9 +81,17 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
|
||||
|
||||
if (infile)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM INFILE " << (settings.hilite ? hilite_none : "") << infile->as<ASTLiteral &>().value.safeGet<std::string>();
|
||||
settings.ostr
|
||||
<< (settings.hilite ? hilite_keyword : "")
|
||||
<< " FROM INFILE "
|
||||
<< (settings.hilite ? hilite_none : "")
|
||||
<< quoteString(infile->as<ASTLiteral &>().value.safeGet<std::string>());
|
||||
if (compression)
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " COMPRESSION " << (settings.hilite ? hilite_none : "") << compression->as<ASTLiteral &>().value.safeGet<std::string>();
|
||||
settings.ostr
|
||||
<< (settings.hilite ? hilite_keyword : "")
|
||||
<< " COMPRESSION "
|
||||
<< (settings.hilite ? hilite_none : "")
|
||||
<< quoteString(compression->as<ASTLiteral &>().value.safeGet<std::string>());
|
||||
}
|
||||
|
||||
if (select)
|
||||
|
@ -15,6 +15,7 @@ public:
|
||||
size_t fields_count = 0;
|
||||
|
||||
String id;
|
||||
bool all = false;
|
||||
|
||||
String getID(char) const override;
|
||||
ASTPtr clone() const override;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Parsers/ASTPartition.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
@ -13,6 +14,7 @@ namespace DB
|
||||
bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_id("ID");
|
||||
ParserKeyword s_all("ALL");
|
||||
ParserStringLiteral parser_string_literal;
|
||||
ParserExpression parser_expr;
|
||||
|
||||
@ -28,6 +30,14 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
partition->id = partition_id->as<ASTLiteral &>().value.get<String>();
|
||||
}
|
||||
else if (s_all.ignore(pos, expected))
|
||||
{
|
||||
ASTPtr value = makeASTFunction("tuple");
|
||||
partition->value = value;
|
||||
partition->children.push_back(value);
|
||||
partition->fields_count = 0;
|
||||
partition->all = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASTPtr value;
|
||||
|
@ -38,12 +38,7 @@ LimitTransform::LimitTransform(
|
||||
}
|
||||
|
||||
for (const auto & desc : description)
|
||||
{
|
||||
if (!desc.column_name.empty())
|
||||
sort_column_positions.push_back(header_.getPositionByName(desc.column_name));
|
||||
else
|
||||
sort_column_positions.push_back(desc.column_number);
|
||||
}
|
||||
sort_column_positions.push_back(header_.getPositionByName(desc.column_name));
|
||||
}
|
||||
|
||||
Chunk LimitTransform::makeChunkWithPreviousRow(const Chunk & chunk, UInt64 row) const
|
||||
|
@ -104,7 +104,7 @@ static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
/// Included into PK?
|
||||
auto it = description.begin();
|
||||
for (; it != description.end(); ++it)
|
||||
if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i))
|
||||
if (it->column_name == column.name)
|
||||
break;
|
||||
|
||||
if (it != description.end())
|
||||
@ -290,11 +290,10 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::initAggregateDescription
|
||||
|
||||
|
||||
AggregatingSortedAlgorithm::AggregatingSortedAlgorithm(
|
||||
const Block & header, size_t num_inputs,
|
||||
SortDescription description_, size_t max_block_size)
|
||||
: IMergingAlgorithmWithDelayedChunk(num_inputs, description_)
|
||||
, columns_definition(defineColumns(header, description_))
|
||||
, merged_data(getMergedColumns(header, columns_definition), max_block_size, columns_definition)
|
||||
const Block & header_, size_t num_inputs, SortDescription description_, size_t max_block_size)
|
||||
: IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_)
|
||||
, columns_definition(defineColumns(header_, description_))
|
||||
, merged_data(getMergedColumns(header_, columns_definition), max_block_size, columns_definition)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
CollapsingSortedAlgorithm::CollapsingSortedAlgorithm(
|
||||
const Block & header,
|
||||
const Block & header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
const String & sign_column,
|
||||
@ -30,9 +30,9 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm(
|
||||
Poco::Logger * log_,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool use_average_block_sizes)
|
||||
: IMergingAlgorithmWithSharedChunks(num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
|
||||
, merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
, sign_column_number(header.getPositionByName(sign_column))
|
||||
: IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
|
||||
, merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
, sign_column_number(header_.getPositionByName(sign_column))
|
||||
, only_positive_sign(only_positive_sign_)
|
||||
, log(log_)
|
||||
{
|
||||
|
@ -14,11 +14,8 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
FinishAggregatingInOrderAlgorithm::State::State(
|
||||
const Chunk & chunk, const SortDescription & desc, Int64 total_bytes_)
|
||||
: all_columns(chunk.getColumns())
|
||||
, num_rows(chunk.getNumRows())
|
||||
, total_bytes(total_bytes_)
|
||||
FinishAggregatingInOrderAlgorithm::State::State(const Chunk & chunk, const SortDescriptionWithPositions & desc, Int64 total_bytes_)
|
||||
: all_columns(chunk.getColumns()), num_rows(chunk.getNumRows()), total_bytes(total_bytes_)
|
||||
{
|
||||
if (!chunk)
|
||||
return;
|
||||
@ -32,25 +29,13 @@ FinishAggregatingInOrderAlgorithm::FinishAggregatingInOrderAlgorithm(
|
||||
const Block & header_,
|
||||
size_t num_inputs_,
|
||||
AggregatingTransformParamsPtr params_,
|
||||
SortDescription description_,
|
||||
const SortDescription & description_,
|
||||
size_t max_block_size_,
|
||||
size_t max_block_bytes_)
|
||||
: header(header_)
|
||||
, num_inputs(num_inputs_)
|
||||
, params(params_)
|
||||
, description(std::move(description_))
|
||||
, max_block_size(max_block_size_)
|
||||
, max_block_bytes(max_block_bytes_)
|
||||
: header(header_), num_inputs(num_inputs_), params(params_), max_block_size(max_block_size_), max_block_bytes(max_block_bytes_)
|
||||
{
|
||||
/// Replace column names in description to positions.
|
||||
for (auto & column_description : description)
|
||||
{
|
||||
if (!column_description.column_name.empty())
|
||||
{
|
||||
column_description.column_number = header_.getPositionByName(column_description.column_name);
|
||||
column_description.column_name.clear();
|
||||
}
|
||||
}
|
||||
for (const auto & column_description : description_)
|
||||
description.emplace_back(column_description, header_.getPositionByName(column_description.column_name));
|
||||
}
|
||||
|
||||
void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs)
|
||||
|
@ -41,7 +41,7 @@ public:
|
||||
const Block & header_,
|
||||
size_t num_inputs_,
|
||||
AggregatingTransformParamsPtr params_,
|
||||
SortDescription description_,
|
||||
const SortDescription & description_,
|
||||
size_t max_block_size_,
|
||||
size_t max_block_bytes_);
|
||||
|
||||
@ -69,7 +69,7 @@ private:
|
||||
/// Number of bytes in all columns + number of bytes in arena, related to current chunk.
|
||||
size_t total_bytes = 0;
|
||||
|
||||
State(const Chunk & chunk, const SortDescription & description, Int64 total_bytes_);
|
||||
State(const Chunk & chunk, const SortDescriptionWithPositions & description, Int64 total_bytes_);
|
||||
State() = default;
|
||||
|
||||
bool isValid() const { return current_row < num_rows; }
|
||||
@ -78,7 +78,7 @@ private:
|
||||
Block header;
|
||||
size_t num_inputs;
|
||||
AggregatingTransformParamsPtr params;
|
||||
SortDescription description;
|
||||
SortDescriptionWithPositions description;
|
||||
size_t max_block_size;
|
||||
size_t max_block_bytes;
|
||||
|
||||
|
@ -30,12 +30,16 @@ static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
}
|
||||
|
||||
GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm(
|
||||
const Block & header, size_t num_inputs,
|
||||
SortDescription description_, size_t max_block_size,
|
||||
Graphite::Params params_, time_t time_of_merge_)
|
||||
: IMergingAlgorithmWithSharedChunks(num_inputs, std::move(description_), nullptr, max_row_refs)
|
||||
, merged_data(header.cloneEmptyColumns(), false, max_block_size)
|
||||
, params(std::move(params_)), time_of_merge(time_of_merge_)
|
||||
const Block & header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
size_t max_block_size,
|
||||
Graphite::Params params_,
|
||||
time_t time_of_merge_)
|
||||
: IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs)
|
||||
, merged_data(header_.cloneEmptyColumns(), false, max_block_size)
|
||||
, params(std::move(params_))
|
||||
, time_of_merge(time_of_merge_)
|
||||
{
|
||||
size_t max_size_of_aggregate_state = 0;
|
||||
size_t max_alignment_of_aggregate_state = 1;
|
||||
@ -50,7 +54,7 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm(
|
||||
}
|
||||
|
||||
merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state);
|
||||
columns_definition = defineColumns(header, params);
|
||||
columns_definition = defineColumns(header_, params);
|
||||
}
|
||||
|
||||
UInt32 GraphiteRollupSortedAlgorithm::selectPrecision(const Graphite::Retentions & retentions, time_t time) const
|
||||
|
@ -4,12 +4,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk(
|
||||
size_t num_inputs,
|
||||
SortDescription description_)
|
||||
: description(std::move(description_))
|
||||
, current_inputs(num_inputs)
|
||||
, cursors(num_inputs)
|
||||
IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk(Block header_, size_t num_inputs, SortDescription description_)
|
||||
: description(std::move(description_)), header(std::move(header_)), current_inputs(num_inputs), cursors(num_inputs)
|
||||
{
|
||||
}
|
||||
|
||||
@ -22,7 +18,8 @@ void IMergingAlgorithmWithDelayedChunk::initializeQueue(Inputs inputs)
|
||||
if (!current_inputs[source_num].chunk)
|
||||
continue;
|
||||
|
||||
cursors[source_num] = SortCursorImpl(current_inputs[source_num].chunk.getColumns(), description, source_num, current_inputs[source_num].permutation);
|
||||
cursors[source_num] = SortCursorImpl(
|
||||
header, current_inputs[source_num].chunk.getColumns(), description, source_num, current_inputs[source_num].permutation);
|
||||
}
|
||||
|
||||
queue = SortingHeap<SortCursor>(cursors);
|
||||
@ -37,7 +34,7 @@ void IMergingAlgorithmWithDelayedChunk::updateCursor(Input & input, size_t sourc
|
||||
last_chunk_sort_columns = std::move(cursors[source_num].sort_columns);
|
||||
|
||||
current_input.swap(input);
|
||||
cursors[source_num].reset(current_input.chunk.getColumns(), {}, current_input.permutation);
|
||||
cursors[source_num].reset(current_input.chunk.getColumns(), header, current_input.permutation);
|
||||
|
||||
queue.push(cursors[source_num]);
|
||||
}
|
||||
|
@ -10,9 +10,7 @@ namespace DB
|
||||
class IMergingAlgorithmWithDelayedChunk : public IMergingAlgorithm
|
||||
{
|
||||
public:
|
||||
IMergingAlgorithmWithDelayedChunk(
|
||||
size_t num_inputs,
|
||||
SortDescription description_);
|
||||
IMergingAlgorithmWithDelayedChunk(Block header_, size_t num_inputs, SortDescription description_);
|
||||
|
||||
protected:
|
||||
SortingHeap<SortCursor> queue;
|
||||
@ -28,6 +26,8 @@ protected:
|
||||
bool skipLastRowFor(size_t input_number) const { return current_inputs[input_number].skip_last_row; }
|
||||
|
||||
private:
|
||||
Block header;
|
||||
|
||||
/// Inputs currently being merged.
|
||||
Inputs current_inputs;
|
||||
SortCursorImpls cursors;
|
||||
|
@ -4,11 +4,9 @@ namespace DB
|
||||
{
|
||||
|
||||
IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks(
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
size_t max_row_refs)
|
||||
: description(std::move(description_))
|
||||
Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs)
|
||||
: header(std::move(header_))
|
||||
, description(std::move(description_))
|
||||
, chunk_allocator(num_inputs + max_row_refs)
|
||||
, cursors(num_inputs)
|
||||
, sources(num_inputs)
|
||||
@ -39,7 +37,7 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
|
||||
|
||||
source.skip_last_row = inputs[source_num].skip_last_row;
|
||||
source.chunk = chunk_allocator.alloc(inputs[source_num].chunk);
|
||||
cursors[source_num] = SortCursorImpl(source.chunk->getColumns(), description, source_num, inputs[source_num].permutation);
|
||||
cursors[source_num] = SortCursorImpl(header, source.chunk->getColumns(), description, source_num, inputs[source_num].permutation);
|
||||
|
||||
source.chunk->all_columns = cursors[source_num].all_columns;
|
||||
source.chunk->sort_columns = cursors[source_num].sort_columns;
|
||||
@ -55,7 +53,7 @@ void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num
|
||||
auto & source = sources[source_num];
|
||||
source.skip_last_row = input.skip_last_row;
|
||||
source.chunk = chunk_allocator.alloc(input.chunk);
|
||||
cursors[source_num].reset(source.chunk->getColumns(), {}, input.permutation);
|
||||
cursors[source_num].reset(source.chunk->getColumns(), header, input.permutation);
|
||||
|
||||
source.chunk->all_columns = cursors[source_num].all_columns;
|
||||
source.chunk->sort_columns = cursors[source_num].sort_columns;
|
||||
|
@ -10,15 +10,13 @@ class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm
|
||||
{
|
||||
public:
|
||||
IMergingAlgorithmWithSharedChunks(
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
size_t max_row_refs);
|
||||
Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs);
|
||||
|
||||
void initialize(Inputs inputs) override;
|
||||
void consume(Input & input, size_t source_num) override;
|
||||
|
||||
private:
|
||||
Block header;
|
||||
SortDescription description;
|
||||
|
||||
/// Allocator must be destroyed after source_chunks.
|
||||
|
@ -11,30 +11,22 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
MergingSortedAlgorithm::MergingSortedAlgorithm(
|
||||
const Block & header,
|
||||
Block header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool use_average_block_sizes)
|
||||
: merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
: header(std::move(header_))
|
||||
, merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
, description(std::move(description_))
|
||||
, limit(limit_)
|
||||
, has_collation(std::any_of(description.begin(), description.end(), [](const auto & descr) { return descr.collator != nullptr; }))
|
||||
, out_row_sources_buf(out_row_sources_buf_)
|
||||
, current_inputs(num_inputs)
|
||||
, cursors(num_inputs)
|
||||
{
|
||||
/// Replace column names in description to positions.
|
||||
for (auto & column_description : description)
|
||||
{
|
||||
has_collation |= column_description.collator != nullptr;
|
||||
if (!column_description.column_name.empty())
|
||||
{
|
||||
column_description.column_number = header.getPositionByName(column_description.column_name);
|
||||
column_description.column_name.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MergingSortedAlgorithm::addInput()
|
||||
@ -65,7 +57,7 @@ void MergingSortedAlgorithm::initialize(Inputs inputs)
|
||||
continue;
|
||||
|
||||
prepareChunk(chunk);
|
||||
cursors[source_num] = SortCursorImpl(chunk.getColumns(), description, source_num);
|
||||
cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), description, source_num);
|
||||
}
|
||||
|
||||
if (has_collation)
|
||||
@ -78,7 +70,7 @@ void MergingSortedAlgorithm::consume(Input & input, size_t source_num)
|
||||
{
|
||||
prepareChunk(input.chunk);
|
||||
current_inputs[source_num].swap(input);
|
||||
cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), {});
|
||||
cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header);
|
||||
|
||||
if (has_collation)
|
||||
queue_with_collation.push(cursors[source_num]);
|
||||
|
@ -14,7 +14,7 @@ class MergingSortedAlgorithm final : public IMergingAlgorithm
|
||||
{
|
||||
public:
|
||||
MergingSortedAlgorithm(
|
||||
const Block & header,
|
||||
Block header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
size_t max_block_size,
|
||||
@ -31,6 +31,8 @@ public:
|
||||
const MergedData & getMergedData() const { return merged_data; }
|
||||
|
||||
private:
|
||||
Block header;
|
||||
|
||||
MergedData merged_data;
|
||||
|
||||
/// Settings
|
||||
|
@ -5,16 +5,18 @@ namespace DB
|
||||
{
|
||||
|
||||
ReplacingSortedAlgorithm::ReplacingSortedAlgorithm(
|
||||
const Block & header, size_t num_inputs,
|
||||
SortDescription description_, const String & version_column,
|
||||
size_t max_block_size,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool use_average_block_sizes)
|
||||
: IMergingAlgorithmWithSharedChunks(num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
|
||||
, merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
const Block & header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
const String & version_column,
|
||||
size_t max_block_size,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool use_average_block_sizes)
|
||||
: IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
|
||||
, merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
{
|
||||
if (!version_column.empty())
|
||||
version_column_number = header.getPositionByName(version_column);
|
||||
version_column_number = header_.getPositionByName(version_column);
|
||||
}
|
||||
|
||||
void ReplacingSortedAlgorithm::insertRow()
|
||||
|
@ -101,10 +101,10 @@ struct SummingSortedAlgorithm::AggregateDescription
|
||||
};
|
||||
|
||||
|
||||
static bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number)
|
||||
static bool isInPrimaryKey(const SortDescription & description, const std::string & name)
|
||||
{
|
||||
for (const auto & desc : description)
|
||||
if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number))
|
||||
if (desc.column_name == name)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -251,7 +251,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
}
|
||||
|
||||
/// Are they inside the primary key or partition key?
|
||||
if (isInPrimaryKey(description, column.name, i) || isInPartitionKey(column.name, partition_key_columns))
|
||||
if (isInPrimaryKey(description, column.name) || isInPartitionKey(column.name, partition_key_columns))
|
||||
{
|
||||
def.column_numbers_not_to_aggregate.push_back(i);
|
||||
continue;
|
||||
@ -307,7 +307,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
/// no elements of map could be in primary key
|
||||
auto column_num_it = map.second.begin();
|
||||
for (; column_num_it != map.second.end(); ++column_num_it)
|
||||
if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it))
|
||||
if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name))
|
||||
break;
|
||||
if (column_num_it != map.second.end())
|
||||
{
|
||||
@ -687,14 +687,15 @@ Chunk SummingSortedAlgorithm::SummingMergedData::pull()
|
||||
|
||||
|
||||
SummingSortedAlgorithm::SummingSortedAlgorithm(
|
||||
const Block & header, size_t num_inputs,
|
||||
const Block & header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
const Names & column_names_to_sum,
|
||||
const Names & partition_key_columns,
|
||||
size_t max_block_size)
|
||||
: IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_))
|
||||
, columns_definition(defineColumns(header, description, column_names_to_sum, partition_key_columns))
|
||||
, merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition)
|
||||
: IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_))
|
||||
, columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns))
|
||||
, merged_data(getMergedDataColumns(header_, columns_definition), max_block_size, columns_definition)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -8,19 +8,20 @@ namespace DB
|
||||
static const size_t MAX_ROWS_IN_MULTIVERSION_QUEUE = 8192;
|
||||
|
||||
VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm(
|
||||
const Block & header, size_t num_inputs,
|
||||
SortDescription description_, const String & sign_column_,
|
||||
const Block & header_,
|
||||
size_t num_inputs,
|
||||
SortDescription description_,
|
||||
const String & sign_column_,
|
||||
size_t max_block_size,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool use_average_block_sizes)
|
||||
: IMergingAlgorithmWithSharedChunks(
|
||||
num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE)
|
||||
, merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
: IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE)
|
||||
, merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
|
||||
/// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything.
|
||||
, max_rows_in_queue(std::min(std::max<size_t>(3, max_block_size), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1)
|
||||
, current_keys(max_rows_in_queue)
|
||||
{
|
||||
sign_column_number = header.getPositionByName(sign_column_);
|
||||
sign_column_number = header_.getPositionByName(sign_column_);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE static void writeRowSourcePart(WriteBuffer & buffer, RowSourcePart row_source)
|
||||
|
@ -48,13 +48,13 @@ void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
|
||||
void FillingStep::describeActions(FormatSettings & settings) const
|
||||
{
|
||||
settings.out << String(settings.offset, ' ');
|
||||
dumpSortDescription(sort_description, input_streams.front().header, settings.out);
|
||||
dumpSortDescription(sort_description, settings.out);
|
||||
settings.out << '\n';
|
||||
}
|
||||
|
||||
void FillingStep::describeActions(JSONBuilder::JSONMap & map) const
|
||||
{
|
||||
map.add("Sort Description", explainSortDescription(sort_description, input_streams.front().header));
|
||||
map.add("Sort Description", explainSortDescription(sort_description));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -70,4 +70,9 @@ void ITransformingStep::describePipeline(FormatSettings & settings) const
|
||||
IQueryPlanStep::describePipeline(processors, settings);
|
||||
}
|
||||
|
||||
void ITransformingStep::appendExtraProcessors(const Processors & extra_processors)
|
||||
{
|
||||
processors.insert(processors.end(), extra_processors.begin(), extra_processors.end());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -57,6 +57,9 @@ public:
|
||||
|
||||
void describePipeline(FormatSettings & settings) const override;
|
||||
|
||||
/// Append extra processors for this step.
|
||||
void appendExtraProcessors(const Processors & extra_processors);
|
||||
|
||||
protected:
|
||||
/// Clear distinct_columns if res_header doesn't contain all of them.
|
||||
static void updateDistinctColumns(const Block & res_header, NameSet & distinct_columns);
|
||||
|
@ -44,16 +44,20 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
/// May split FilterStep and push down only part of it.
|
||||
size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Move ExpressionStep after SortingStep if possible.
|
||||
/// May split ExpressionStep and lift up only a part of it.
|
||||
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 5> optimizations =
|
||||
{{
|
||||
static const std::array<Optimization, 6> optimizations = {{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
|
||||
}};
|
||||
{tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
}
|
||||
|
77
src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
Normal file
77
src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/SortingStep.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node)
|
||||
{
|
||||
if (node.children.size() != 1)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Node \"{}\" is expected to have only one child.", node.step->getName());
|
||||
return node.children.front()->step->getOutputStream();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent_step = parent_node->step;
|
||||
auto & child_step = child_node->step;
|
||||
auto * sorting_step = typeid_cast<SortingStep *>(parent_step.get());
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(child_step.get());
|
||||
|
||||
if (!sorting_step || !expression_step)
|
||||
return 0;
|
||||
|
||||
NameSet sort_columns;
|
||||
for (const auto & col : sorting_step->getSortDescription())
|
||||
sort_columns.insert(col.column_name);
|
||||
auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
|
||||
|
||||
// No calculations can be postponed.
|
||||
if (unneeded_for_sorting->trivial())
|
||||
return 0;
|
||||
|
||||
// Sorting (parent_node) -> Expression (child_node)
|
||||
auto & node_with_needed = nodes.emplace_back();
|
||||
std::swap(node_with_needed.children, child_node->children);
|
||||
child_node->children = {&node_with_needed};
|
||||
|
||||
node_with_needed.step = std::make_unique<ExpressionStep>(getChildOutputStream(node_with_needed), std::move(needed_for_sorting));
|
||||
node_with_needed.step->setStepDescription(child_step->getStepDescription());
|
||||
// Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
std::swap(parent_step, child_step);
|
||||
// so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
sorting_step->updateInputStream(getChildOutputStream(*child_node));
|
||||
auto input_header = sorting_step->getInputStreams().at(0).header;
|
||||
sorting_step->updateOutputStream(std::move(input_header));
|
||||
|
||||
auto description = parent_step->getStepDescription();
|
||||
parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
|
||||
parent_step->setStepDescription(description + " [lifted up part]");
|
||||
// UneededCalculations (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
return 3;
|
||||
}
|
||||
}
|
@ -612,14 +612,8 @@ static void addMergingFinal(
|
||||
|
||||
ColumnNumbers key_columns;
|
||||
key_columns.reserve(sort_description.size());
|
||||
|
||||
for (const auto & desc : sort_description)
|
||||
{
|
||||
if (!desc.column_name.empty())
|
||||
key_columns.push_back(header.getPositionByName(desc.column_name));
|
||||
else
|
||||
key_columns.emplace_back(desc.column_number);
|
||||
}
|
||||
key_columns.push_back(header.getPositionByName(desc.column_name));
|
||||
|
||||
pipe.addSimpleTransform([&](const Block & stream_header)
|
||||
{
|
||||
@ -774,9 +768,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
||||
|
||||
Names partition_key_columns = metadata_for_reading->getPartitionKey().column_names;
|
||||
|
||||
const auto & header = pipe.getHeader();
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||
sort_description.emplace_back(sort_columns[i], 1, 1);
|
||||
|
||||
addMergingFinal(
|
||||
pipe,
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include <stdexcept>
|
||||
#include <IO/Operators.h>
|
||||
#include <Processors/Merges/MergingSortedTransform.h>
|
||||
#include <Processors/QueryPlan/SortingStep.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Processors/Transforms/FinishSortingTransform.h>
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <Processors/Transforms/MergeSortingTransform.h>
|
||||
#include <Processors/Transforms/PartialSortingTransform.h>
|
||||
#include <Processors/Transforms/FinishSortingTransform.h>
|
||||
#include <Processors/Merges/MergingSortedTransform.h>
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
|
||||
namespace DB
|
||||
@ -88,6 +89,18 @@ SortingStep::SortingStep(
|
||||
output_stream->sort_mode = DataStream::SortMode::Stream;
|
||||
}
|
||||
|
||||
void SortingStep::updateInputStream(DataStream input_stream)
|
||||
{
|
||||
input_streams.clear();
|
||||
input_streams.emplace_back(std::move(input_stream));
|
||||
}
|
||||
|
||||
void SortingStep::updateOutputStream(Block result_header)
|
||||
{
|
||||
output_stream = createOutputStream(input_streams.at(0), std::move(result_header), getDataStreamTraits());
|
||||
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
|
||||
}
|
||||
|
||||
void SortingStep::updateLimit(size_t limit_)
|
||||
{
|
||||
if (limit_ && (limit == 0 || limit_ < limit))
|
||||
@ -206,17 +219,17 @@ void SortingStep::describeActions(FormatSettings & settings) const
|
||||
if (!prefix_description.empty())
|
||||
{
|
||||
settings.out << prefix << "Prefix sort description: ";
|
||||
dumpSortDescription(prefix_description, input_streams.front().header, settings.out);
|
||||
dumpSortDescription(prefix_description, settings.out);
|
||||
settings.out << '\n';
|
||||
|
||||
settings.out << prefix << "Result sort description: ";
|
||||
dumpSortDescription(result_description, input_streams.front().header, settings.out);
|
||||
dumpSortDescription(result_description, settings.out);
|
||||
settings.out << '\n';
|
||||
}
|
||||
else
|
||||
{
|
||||
settings.out << prefix << "Sort description: ";
|
||||
dumpSortDescription(result_description, input_streams.front().header, settings.out);
|
||||
dumpSortDescription(result_description, settings.out);
|
||||
settings.out << '\n';
|
||||
}
|
||||
|
||||
@ -228,11 +241,11 @@ void SortingStep::describeActions(JSONBuilder::JSONMap & map) const
|
||||
{
|
||||
if (!prefix_description.empty())
|
||||
{
|
||||
map.add("Prefix Sort Description", explainSortDescription(prefix_description, input_streams.front().header));
|
||||
map.add("Result Sort Description", explainSortDescription(result_description, input_streams.front().header));
|
||||
map.add("Prefix Sort Description", explainSortDescription(prefix_description));
|
||||
map.add("Result Sort Description", explainSortDescription(result_description));
|
||||
}
|
||||
else
|
||||
map.add("Sort Description", explainSortDescription(result_description, input_streams.front().header));
|
||||
map.add("Sort Description", explainSortDescription(result_description));
|
||||
|
||||
if (limit)
|
||||
map.add("Limit", limit);
|
||||
|
@ -49,6 +49,11 @@ public:
|
||||
/// Add limit or change it to lower value.
|
||||
void updateLimit(size_t limit_);
|
||||
|
||||
void updateInputStream(DataStream input_stream);
|
||||
void updateOutputStream(Block result_header);
|
||||
|
||||
SortDescription getSortDescription() const { return result_description; }
|
||||
|
||||
private:
|
||||
|
||||
enum class Type
|
||||
|
@ -129,7 +129,7 @@ void WindowStep::describeActions(JSONBuilder::JSONMap & map) const
|
||||
}
|
||||
|
||||
if (!window_description.order_by.empty())
|
||||
map.add("Sort Description", explainSortDescription(window_description.order_by, {}));
|
||||
map.add("Sort Description", explainSortDescription(window_description.order_by));
|
||||
|
||||
auto functions_array = std::make_unique<JSONBuilder::JSONArray>();
|
||||
for (const auto & func : window_functions)
|
||||
|
@ -26,7 +26,6 @@ AggregatingInOrderTransform::AggregatingInOrderTransform(
|
||||
, max_block_size(max_block_size_)
|
||||
, max_block_bytes(max_block_bytes_)
|
||||
, params(std::move(params_))
|
||||
, group_by_description(group_by_description_)
|
||||
, aggregate_columns(params->params.aggregates_size)
|
||||
, many_data(std::move(many_data_))
|
||||
, variants(*many_data->variants[current_variant])
|
||||
@ -34,15 +33,8 @@ AggregatingInOrderTransform::AggregatingInOrderTransform(
|
||||
/// We won't finalize states in order to merge same states (generated due to multi-thread execution) in AggregatingSortedTransform
|
||||
res_header = params->getCustomHeader(false);
|
||||
|
||||
/// Replace column names to column position in description_sorted.
|
||||
for (auto & column_description : group_by_description)
|
||||
{
|
||||
if (!column_description.column_name.empty())
|
||||
{
|
||||
column_description.column_number = res_header.getPositionByName(column_description.column_name);
|
||||
column_description.column_name.clear();
|
||||
}
|
||||
}
|
||||
for (const auto & column_description : group_by_description_)
|
||||
group_by_description.emplace_back(column_description, res_header.getPositionByName(column_description.column_name));
|
||||
}
|
||||
|
||||
AggregatingInOrderTransform::~AggregatingInOrderTransform() = default;
|
||||
|
@ -51,7 +51,7 @@ private:
|
||||
MutableColumns res_aggregate_columns;
|
||||
|
||||
AggregatingTransformParamsPtr params;
|
||||
SortDescription group_by_description;
|
||||
SortDescriptionWithPositions group_by_description;
|
||||
|
||||
Aggregator::AggregateColumns aggregate_columns;
|
||||
|
||||
|
@ -12,33 +12,13 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
CheckSortedTransform::CheckSortedTransform(
|
||||
const Block & header_,
|
||||
const SortDescription & sort_description_)
|
||||
: ISimpleTransform(header_, header_, false)
|
||||
, sort_description_map(addPositionsToSortDescriptions(sort_description_))
|
||||
CheckSortedTransform::CheckSortedTransform(const Block & header, const SortDescription & sort_description)
|
||||
: ISimpleTransform(header, header, false)
|
||||
{
|
||||
for (const auto & column_description : sort_description)
|
||||
sort_description_map.emplace_back(column_description, header.getPositionByName(column_description.column_name));
|
||||
}
|
||||
|
||||
SortDescriptionsWithPositions
|
||||
CheckSortedTransform::addPositionsToSortDescriptions(const SortDescription & sort_description)
|
||||
{
|
||||
SortDescriptionsWithPositions result;
|
||||
result.reserve(sort_description.size());
|
||||
const auto & header = getInputPort().getHeader();
|
||||
|
||||
for (SortColumnDescription description_copy : sort_description)
|
||||
{
|
||||
if (!description_copy.column_name.empty())
|
||||
description_copy.column_number = header.getPositionByName(description_copy.column_name);
|
||||
|
||||
result.push_back(description_copy);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void CheckSortedTransform::transform(Chunk & chunk)
|
||||
{
|
||||
size_t num_rows = chunk.getNumRows();
|
||||
@ -54,7 +34,7 @@ void CheckSortedTransform::transform(Chunk & chunk)
|
||||
const IColumn * left_col = left[column_number].get();
|
||||
const IColumn * right_col = right[column_number].get();
|
||||
|
||||
int res = elem.direction * left_col->compareAt(left_index, right_index, *right_col, elem.nulls_direction);
|
||||
int res = elem.base.direction * left_col->compareAt(left_index, right_index, *right_col, elem.base.nulls_direction);
|
||||
if (res < 0)
|
||||
{
|
||||
return;
|
||||
|
@ -5,16 +5,12 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using SortDescriptionsWithPositions = std::vector<SortColumnDescription>;
|
||||
|
||||
/// Streams checks that flow of blocks is sorted in the sort_description order
|
||||
/// Othrewise throws exception in readImpl function.
|
||||
class CheckSortedTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
CheckSortedTransform(
|
||||
const Block & header_,
|
||||
const SortDescription & sort_description_);
|
||||
CheckSortedTransform(const Block & header, const SortDescription & sort_description);
|
||||
|
||||
String getName() const override { return "CheckSortedTransform"; }
|
||||
|
||||
@ -23,10 +19,7 @@ protected:
|
||||
void transform(Chunk & chunk) override;
|
||||
|
||||
private:
|
||||
SortDescriptionsWithPositions sort_description_map;
|
||||
SortDescriptionWithPositions sort_description_map;
|
||||
Columns last_row;
|
||||
|
||||
/// Just checks, that all sort_descriptions has column_number
|
||||
SortDescriptionsWithPositions addPositionsToSortDescriptions(const SortDescription & sort_description);
|
||||
};
|
||||
}
|
||||
|
@ -9,8 +9,9 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
DistinctSortedTransform::DistinctSortedTransform(
|
||||
const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns)
|
||||
: ISimpleTransform(header, header, true)
|
||||
Block header_, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns)
|
||||
: ISimpleTransform(header_, header_, true)
|
||||
, header(std::move(header_))
|
||||
, description(std::move(sort_description))
|
||||
, columns_names(columns)
|
||||
, limit_hint(limit_hint_)
|
||||
@ -24,7 +25,7 @@ void DistinctSortedTransform::transform(Chunk & chunk)
|
||||
if (column_ptrs.empty())
|
||||
return;
|
||||
|
||||
ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs));
|
||||
ColumnRawPtrs clearing_hint_columns(getClearingColumns(column_ptrs));
|
||||
|
||||
if (data.type == ClearableSetVariants::Type::EMPTY)
|
||||
data.init(ClearableSetVariants::chooseMethod(column_ptrs, key_sizes));
|
||||
@ -139,13 +140,13 @@ ColumnRawPtrs DistinctSortedTransform::getKeyColumns(const Chunk & chunk) const
|
||||
return column_ptrs;
|
||||
}
|
||||
|
||||
ColumnRawPtrs DistinctSortedTransform::getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const
|
||||
ColumnRawPtrs DistinctSortedTransform::getClearingColumns(const ColumnRawPtrs & key_columns) const
|
||||
{
|
||||
ColumnRawPtrs clearing_hint_columns;
|
||||
clearing_hint_columns.reserve(description.size());
|
||||
for (const auto & sort_column_description : description)
|
||||
{
|
||||
const auto * sort_column_ptr = chunk.getColumns().at(sort_column_description.column_number).get();
|
||||
const auto * sort_column_ptr = header.getByName(sort_column_description.column_name).column.get();
|
||||
const auto it = std::find(key_columns.cbegin(), key_columns.cend(), sort_column_ptr);
|
||||
if (it != key_columns.cend()) /// if found in key_columns
|
||||
clearing_hint_columns.emplace_back(sort_column_ptr);
|
||||
|
@ -22,7 +22,8 @@ class DistinctSortedTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
/// Empty columns_ means all columns.
|
||||
DistinctSortedTransform(const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns);
|
||||
DistinctSortedTransform(
|
||||
Block header_, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns);
|
||||
|
||||
String getName() const override { return "DistinctSortedTransform"; }
|
||||
|
||||
@ -33,7 +34,7 @@ private:
|
||||
ColumnRawPtrs getKeyColumns(const Chunk & chunk) const;
|
||||
/// When clearing_columns changed, we can clean HashSet to memory optimization
|
||||
/// clearing_columns is a left-prefix of SortDescription exists in key_columns
|
||||
ColumnRawPtrs getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const;
|
||||
ColumnRawPtrs getClearingColumns(const ColumnRawPtrs & key_columns) const;
|
||||
static bool rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m);
|
||||
|
||||
/// return true if has new data
|
||||
@ -46,6 +47,7 @@ private:
|
||||
size_t rows,
|
||||
ClearableSetVariants & variants) const;
|
||||
|
||||
Block header;
|
||||
SortDescription description;
|
||||
|
||||
struct PreviousChunk
|
||||
|
@ -21,9 +21,11 @@ static bool isPrefix(const SortDescription & pref_descr, const SortDescription &
|
||||
}
|
||||
|
||||
FinishSortingTransform::FinishSortingTransform(
|
||||
const Block & header, const SortDescription & description_sorted_,
|
||||
const Block & header,
|
||||
const SortDescription & description_sorted_,
|
||||
const SortDescription & description_to_sort_,
|
||||
size_t max_merged_block_size_, UInt64 limit_)
|
||||
size_t max_merged_block_size_,
|
||||
UInt64 limit_)
|
||||
: SortingTransform(header, description_to_sort_, max_merged_block_size_, limit_)
|
||||
{
|
||||
/// Check for sanity non-modified descriptions
|
||||
@ -34,7 +36,8 @@ FinishSortingTransform::FinishSortingTransform(
|
||||
/// The target description is modified in SortingTransform constructor.
|
||||
/// To avoid doing the same actions with description_sorted just copy it from prefix of target description.
|
||||
size_t prefix_size = description_sorted_.size();
|
||||
description_sorted.assign(description.begin(), description.begin() + prefix_size);
|
||||
for (size_t i = 0; i < prefix_size; ++i)
|
||||
description_with_positions.emplace_back(description[i], header_without_constants.getPositionByName(description[i].column_name));
|
||||
}
|
||||
|
||||
void FinishSortingTransform::consume(Chunk chunk)
|
||||
@ -62,7 +65,7 @@ void FinishSortingTransform::consume(Chunk chunk)
|
||||
while (high - low > 1)
|
||||
{
|
||||
ssize_t mid = (low + high) / 2;
|
||||
if (!less(last_chunk.getColumns(), chunk.getColumns(), last_chunk.getNumRows() - 1, mid, description_sorted))
|
||||
if (!less(last_chunk.getColumns(), chunk.getColumns(), last_chunk.getNumRows() - 1, mid, description_with_positions))
|
||||
low = mid;
|
||||
else
|
||||
high = mid;
|
||||
@ -100,7 +103,8 @@ void FinishSortingTransform::generate()
|
||||
{
|
||||
if (!merge_sorter)
|
||||
{
|
||||
merge_sorter = std::make_unique<MergeSorter>(std::move(chunks), description, max_merged_block_size, limit);
|
||||
merge_sorter
|
||||
= std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
|
||||
generated_prefix = true;
|
||||
}
|
||||
|
||||
|
@ -11,9 +11,12 @@ class FinishSortingTransform : public SortingTransform
|
||||
{
|
||||
public:
|
||||
/// limit - if not 0, allowed to return just first 'limit' rows in sorted order.
|
||||
FinishSortingTransform(const Block & header, const SortDescription & description_sorted_,
|
||||
FinishSortingTransform(
|
||||
const Block & header,
|
||||
const SortDescription & description_sorted_,
|
||||
const SortDescription & description_to_sort_,
|
||||
size_t max_merged_block_size_, UInt64 limit_);
|
||||
size_t max_merged_block_size_,
|
||||
UInt64 limit_);
|
||||
|
||||
String getName() const override { return "FinishSortingTransform"; }
|
||||
|
||||
@ -22,7 +25,7 @@ protected:
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
SortDescription description_sorted;
|
||||
SortDescriptionWithPositions description_with_positions;
|
||||
|
||||
Chunk tail_chunk;
|
||||
};
|
||||
|
@ -90,16 +90,21 @@ private:
|
||||
MergeSortingTransform::MergeSortingTransform(
|
||||
const Block & header,
|
||||
const SortDescription & description_,
|
||||
size_t max_merged_block_size_, UInt64 limit_,
|
||||
size_t max_merged_block_size_,
|
||||
UInt64 limit_,
|
||||
size_t max_bytes_before_remerge_,
|
||||
double remerge_lowered_memory_bytes_ratio_,
|
||||
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_,
|
||||
size_t max_bytes_before_external_sort_,
|
||||
VolumePtr tmp_volume_,
|
||||
size_t min_free_disk_space_)
|
||||
: SortingTransform(header, description_, max_merged_block_size_, limit_)
|
||||
, max_bytes_before_remerge(max_bytes_before_remerge_)
|
||||
, remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_)
|
||||
, max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_)
|
||||
, min_free_disk_space(min_free_disk_space_) {}
|
||||
, max_bytes_before_external_sort(max_bytes_before_external_sort_)
|
||||
, tmp_volume(tmp_volume_)
|
||||
, min_free_disk_space(min_free_disk_space_)
|
||||
{
|
||||
}
|
||||
|
||||
Processors MergeSortingTransform::expandPipeline()
|
||||
{
|
||||
@ -180,7 +185,8 @@ void MergeSortingTransform::consume(Chunk chunk)
|
||||
temporary_files.emplace_back(createTemporaryFile(tmp_path));
|
||||
|
||||
const std::string & path = temporary_files.back()->path();
|
||||
merge_sorter = std::make_unique<MergeSorter>(std::move(chunks), description, max_merged_block_size, limit);
|
||||
merge_sorter
|
||||
= std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
|
||||
auto current_processor = std::make_shared<BufferingToFileTransform>(header_without_constants, log, path);
|
||||
|
||||
processors.emplace_back(current_processor);
|
||||
@ -223,7 +229,8 @@ void MergeSortingTransform::generate()
|
||||
if (!generated_prefix)
|
||||
{
|
||||
if (temporary_files.empty())
|
||||
merge_sorter = std::make_unique<MergeSorter>(std::move(chunks), description, max_merged_block_size, limit);
|
||||
merge_sorter
|
||||
= std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ExternalSortMerge);
|
||||
@ -251,7 +258,7 @@ void MergeSortingTransform::remerge()
|
||||
LOG_DEBUG(log, "Re-merging intermediate ORDER BY data ({} blocks with {} rows) to save memory consumption", chunks.size(), sum_rows_in_blocks);
|
||||
|
||||
/// NOTE Maybe concat all blocks and partial sort will be faster than merge?
|
||||
MergeSorter remerge_sorter(std::move(chunks), description, max_merged_block_size, limit);
|
||||
MergeSorter remerge_sorter(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
|
||||
|
||||
Chunks new_chunks;
|
||||
size_t new_sum_rows_in_blocks = 0;
|
||||
|
@ -18,13 +18,16 @@ class MergeSortingTransform : public SortingTransform
|
||||
{
|
||||
public:
|
||||
/// limit - if not 0, allowed to return just first 'limit' rows in sorted order.
|
||||
MergeSortingTransform(const Block & header,
|
||||
const SortDescription & description_,
|
||||
size_t max_merged_block_size_, UInt64 limit_,
|
||||
size_t max_bytes_before_remerge_,
|
||||
double remerge_lowered_memory_bytes_ratio_,
|
||||
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_,
|
||||
size_t min_free_disk_space_);
|
||||
MergeSortingTransform(
|
||||
const Block & header,
|
||||
const SortDescription & description_,
|
||||
size_t max_merged_block_size_,
|
||||
UInt64 limit_,
|
||||
size_t max_bytes_before_remerge_,
|
||||
double remerge_lowered_memory_bytes_ratio_,
|
||||
size_t max_bytes_before_external_sort_,
|
||||
VolumePtr tmp_volume_,
|
||||
size_t min_free_disk_space_);
|
||||
|
||||
String getName() const override { return "MergeSortingTransform"; }
|
||||
|
||||
|
@ -22,9 +22,7 @@ static ColumnRawPtrs extractColumns(const Block & block, const SortDescription &
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const IColumn * column = !description[i].column_name.empty()
|
||||
? block.getByName(description[i].column_name).column.get()
|
||||
: block.safeGetByPosition(description[i].column_number).column.get();
|
||||
const IColumn * column = block.getByName(description[i].column_name).column.get();
|
||||
res.emplace_back(column);
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
MergeSorter::MergeSorter(Chunks chunks_, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_)
|
||||
MergeSorter::MergeSorter(const Block & header, Chunks chunks_, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_)
|
||||
: chunks(std::move(chunks_)), description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_)
|
||||
{
|
||||
Chunks nonempty_chunks;
|
||||
@ -36,7 +36,7 @@ MergeSorter::MergeSorter(Chunks chunks_, SortDescription & description_, size_t
|
||||
/// which can be inefficient.
|
||||
convertToFullIfSparse(chunk);
|
||||
|
||||
cursors.emplace_back(chunk.getColumns(), description);
|
||||
cursors.emplace_back(header, chunk.getColumns(), description);
|
||||
has_collation |= cursors.back().has_collation;
|
||||
|
||||
nonempty_chunks.emplace_back(std::move(chunk));
|
||||
@ -139,16 +139,6 @@ SortingTransform::SortingTransform(
|
||||
{
|
||||
const auto & sample = inputs.front().getHeader();
|
||||
|
||||
/// Replace column names to column position in sort_description.
|
||||
for (auto & column_description : description)
|
||||
{
|
||||
if (!column_description.column_name.empty())
|
||||
{
|
||||
column_description.column_number = sample.getPositionByName(column_description.column_name);
|
||||
column_description.column_name.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove constants from header and map old indexes to new.
|
||||
size_t num_columns = sample.columns();
|
||||
ColumnNumbers map(num_columns, num_columns);
|
||||
@ -169,13 +159,10 @@ SortingTransform::SortingTransform(
|
||||
description_without_constants.reserve(description.size());
|
||||
for (const auto & column_description : description)
|
||||
{
|
||||
auto old_pos = column_description.column_number;
|
||||
auto old_pos = header.getPositionByName(column_description.column_name);
|
||||
auto new_pos = map[old_pos];
|
||||
if (new_pos < num_columns)
|
||||
{
|
||||
description_without_constants.push_back(column_description);
|
||||
description_without_constants.back().column_number = new_pos;
|
||||
}
|
||||
}
|
||||
|
||||
description.swap(description_without_constants);
|
||||
|
@ -15,7 +15,7 @@ namespace DB
|
||||
class MergeSorter
|
||||
{
|
||||
public:
|
||||
MergeSorter(Chunks chunks_, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_);
|
||||
MergeSorter(const Block & header, Chunks chunks_, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_);
|
||||
|
||||
Chunk read();
|
||||
|
||||
@ -45,8 +45,10 @@ private:
|
||||
class MergeSorterSource : public ISource
|
||||
{
|
||||
public:
|
||||
MergeSorterSource(Block header, Chunks chunks, SortDescription & description, size_t max_merged_block_size, UInt64 limit)
|
||||
: ISource(std::move(header)), merge_sorter(std::move(chunks), description, max_merged_block_size, limit) {}
|
||||
MergeSorterSource(const Block & header, Chunks chunks, SortDescription & description, size_t max_merged_block_size, UInt64 limit)
|
||||
: ISource(header), merge_sorter(header, std::move(chunks), description, max_merged_block_size, limit)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "MergeSorterSource"; }
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/ResizeProcessor.h>
|
||||
#include <Processors/LimitTransform.h>
|
||||
#include <Processors/Transforms/TotalsHavingTransform.h>
|
||||
@ -307,7 +308,15 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
|
||||
right->pipe.dropExtremes();
|
||||
|
||||
left->pipe.collected_processors = collected_processors;
|
||||
right->pipe.collected_processors = collected_processors;
|
||||
|
||||
/// Collect the NEW processors for the right pipeline.
|
||||
QueryPipelineProcessorsCollector collector(*right);
|
||||
/// Remember the last step of the right pipeline.
|
||||
ExpressionStep* step = typeid_cast<ExpressionStep*>(right->pipe.processors.back()->getQueryPlanStep());
|
||||
if (!step)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The top step of the right pipeline should be ExpressionStep");
|
||||
}
|
||||
|
||||
/// In case joined subquery has totals, and we don't, add default chunk to totals.
|
||||
bool default_totals = false;
|
||||
@ -377,6 +386,10 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
|
||||
left->pipe.processors.emplace_back(std::move(joining));
|
||||
}
|
||||
|
||||
/// Move the collected processors to the last step in the right pipeline.
|
||||
Processors processors = collector.detachProcessors();
|
||||
step->appendExtraProcessors(processors);
|
||||
|
||||
left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end());
|
||||
left->pipe.holder = std::move(right->pipe.holder);
|
||||
left->pipe.header = left->pipe.output_ports.front()->getHeader();
|
||||
|
@ -696,22 +696,24 @@ namespace
|
||||
/// The function works for Arrays and Nullables of the same structure.
|
||||
bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
|
||||
{
|
||||
if (from->equals(*to))
|
||||
return true;
|
||||
|
||||
if (const auto * from_enum8 = typeid_cast<const DataTypeEnum8 *>(from))
|
||||
auto is_compatible_enum_types_conversion = [](const IDataType * from_type, const IDataType * to_type)
|
||||
{
|
||||
if (const auto * to_enum8 = typeid_cast<const DataTypeEnum8 *>(to))
|
||||
return to_enum8->contains(*from_enum8);
|
||||
}
|
||||
if (const auto * from_enum8 = typeid_cast<const DataTypeEnum8 *>(from_type))
|
||||
{
|
||||
if (const auto * to_enum8 = typeid_cast<const DataTypeEnum8 *>(to_type))
|
||||
return to_enum8->contains(*from_enum8);
|
||||
}
|
||||
|
||||
if (const auto * from_enum16 = typeid_cast<const DataTypeEnum16 *>(from))
|
||||
{
|
||||
if (const auto * to_enum16 = typeid_cast<const DataTypeEnum16 *>(to))
|
||||
return to_enum16->contains(*from_enum16);
|
||||
}
|
||||
if (const auto * from_enum16 = typeid_cast<const DataTypeEnum16 *>(from_type))
|
||||
{
|
||||
if (const auto * to_enum16 = typeid_cast<const DataTypeEnum16 *>(to_type))
|
||||
return to_enum16->contains(*from_enum16);
|
||||
}
|
||||
|
||||
static const std::unordered_multimap<std::type_index, const std::type_info &> ALLOWED_CONVERSIONS =
|
||||
return false;
|
||||
};
|
||||
|
||||
static const std::unordered_multimap<std::type_index, const std::type_info &> allowed_conversions =
|
||||
{
|
||||
{ typeid(DataTypeEnum8), typeid(DataTypeInt8) },
|
||||
{ typeid(DataTypeEnum16), typeid(DataTypeInt16) },
|
||||
@ -721,12 +723,19 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
|
||||
{ typeid(DataTypeUInt16), typeid(DataTypeDate) },
|
||||
};
|
||||
|
||||
/// Unwrap some nested and check for valid conevrsions
|
||||
while (true)
|
||||
{
|
||||
/// types are equal, obviously pure metadata alter
|
||||
if (from->equals(*to))
|
||||
return true;
|
||||
|
||||
auto it_range = ALLOWED_CONVERSIONS.equal_range(typeid(*from));
|
||||
/// We just adding something to enum, nothing changed on disk
|
||||
if (is_compatible_enum_types_conversion(from, to))
|
||||
return true;
|
||||
|
||||
/// Types changed, but representation on disk didn't
|
||||
auto it_range = allowed_conversions.equal_range(typeid(*from));
|
||||
for (auto it = it_range.first; it != it_range.second; ++it)
|
||||
{
|
||||
if (it->second == typeid(*to))
|
||||
@ -1046,8 +1055,12 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt
|
||||
if (!all_columns.has(column_name))
|
||||
{
|
||||
if (!command.if_exists)
|
||||
throw Exception{"Wrong column name. Cannot find column " + backQuote(column_name) + " to modify",
|
||||
ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK};
|
||||
{
|
||||
String exception_message = fmt::format("Wrong column. Cannot find column {} to modify", backQuote(column_name));
|
||||
all_columns.appendHintsMessage(exception_message, column_name);
|
||||
throw Exception{exception_message,
|
||||
ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK};
|
||||
}
|
||||
else
|
||||
continue;
|
||||
}
|
||||
@ -1152,17 +1165,22 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt
|
||||
all_columns.remove(command.column_name);
|
||||
}
|
||||
else if (!command.if_exists)
|
||||
throw Exception(
|
||||
"Wrong column name. Cannot find column " + backQuote(command.column_name) + " to drop",
|
||||
ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
{
|
||||
String exception_message = fmt::format("Wrong column name. Cannot find column {} to drop", backQuote(command.column_name));
|
||||
all_columns.appendHintsMessage(exception_message, command.column_name);
|
||||
throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
}
|
||||
}
|
||||
else if (command.type == AlterCommand::COMMENT_COLUMN)
|
||||
{
|
||||
if (!all_columns.has(command.column_name))
|
||||
{
|
||||
if (!command.if_exists)
|
||||
throw Exception{"Wrong column name. Cannot find column " + backQuote(command.column_name) + " to comment",
|
||||
ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK};
|
||||
{
|
||||
String exception_message = fmt::format("Wrong column name. Cannot find column {} to comment", backQuote(command.column_name));
|
||||
all_columns.appendHintsMessage(exception_message, command.column_name);
|
||||
throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (command.type == AlterCommand::MODIFY_SETTING || command.type == AlterCommand::RESET_SETTING)
|
||||
@ -1196,8 +1214,11 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt
|
||||
if (!all_columns.has(command.column_name))
|
||||
{
|
||||
if (!command.if_exists)
|
||||
throw Exception{"Wrong column name. Cannot find column " + backQuote(command.column_name) + " to rename",
|
||||
ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK};
|
||||
{
|
||||
String exception_message = fmt::format("Wrong column name. Cannot find column {} to rename", backQuote(command.column_name));
|
||||
all_columns.appendHintsMessage(exception_message, command.column_name);
|
||||
throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
}
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
|
||||
if (col_ast->default_expression)
|
||||
{
|
||||
default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier);
|
||||
default_desc.expression = col_ast->default_expression;
|
||||
default_desc.expression = std::move(col_ast->default_expression);
|
||||
}
|
||||
|
||||
if (col_ast->comment)
|
||||
@ -230,8 +230,11 @@ void ColumnsDescription::remove(const String & column_name)
|
||||
{
|
||||
auto range = getNameRange(columns, column_name);
|
||||
if (range.first == range.second)
|
||||
throw Exception("There is no column " + column_name + " in table.",
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
{
|
||||
String exception_message = fmt::format("There is no column {} in table", column_name);
|
||||
appendHintsMessage(exception_message, column_name);
|
||||
throw Exception(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
}
|
||||
|
||||
for (auto list_it = range.first; list_it != range.second;)
|
||||
{
|
||||
@ -244,7 +247,11 @@ void ColumnsDescription::rename(const String & column_from, const String & colum
|
||||
{
|
||||
auto it = columns.get<1>().find(column_from);
|
||||
if (it == columns.get<1>().end())
|
||||
throw Exception("Cannot find column " + column_from + " in ColumnsDescription", ErrorCodes::LOGICAL_ERROR);
|
||||
{
|
||||
String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_from);
|
||||
appendHintsMessage(exception_message, column_from);
|
||||
throw Exception(exception_message, ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
columns.get<1>().modify_key(it, [&column_to] (String & old_name)
|
||||
{
|
||||
@ -745,6 +752,18 @@ void ColumnsDescription::removeSubcolumns(const String & name_in_storage)
|
||||
subcolumns.get<1>().erase(range.first, range.second);
|
||||
}
|
||||
|
||||
std::vector<String> ColumnsDescription::getAllRegisteredNames() const
|
||||
{
|
||||
std::vector<String> names;
|
||||
names.reserve(columns.size());
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
if (column.name.find('.') == std::string::npos)
|
||||
names.push_back(column.name);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context)
|
||||
{
|
||||
for (const auto & child : default_expr_list->children)
|
||||
|
@ -91,7 +91,7 @@ struct ColumnDescription
|
||||
|
||||
|
||||
/// Description of multiple table columns (in CREATE TABLE for example).
|
||||
class ColumnsDescription
|
||||
class ColumnsDescription : public IHints<1, ColumnsDescription>
|
||||
{
|
||||
public:
|
||||
ColumnsDescription() = default;
|
||||
@ -149,7 +149,11 @@ public:
|
||||
{
|
||||
auto it = columns.get<1>().find(column_name);
|
||||
if (it == columns.get<1>().end())
|
||||
throw Exception("Cannot find column " + column_name + " in ColumnsDescription", ErrorCodes::LOGICAL_ERROR);
|
||||
{
|
||||
String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_name);
|
||||
appendHintsMessage(exception_message, column_name);
|
||||
throw Exception(exception_message, ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
removeSubcolumns(it->name);
|
||||
if (!columns.get<1>().modify(it, std::forward<F>(f)))
|
||||
@ -196,6 +200,8 @@ public:
|
||||
return columns.empty();
|
||||
}
|
||||
|
||||
std::vector<String> getAllRegisteredNames() const override;
|
||||
|
||||
/// Keep the sequence of columns and allow to lookup by name.
|
||||
using ColumnsContainer = boost::multi_index_container<
|
||||
ColumnDescription,
|
||||
|
@ -74,7 +74,6 @@ struct IndicesDescription : public std::vector<IndexDescription>, IHints<1, Indi
|
||||
/// Return common expression for all stored indices
|
||||
ExpressionActionsPtr getSingleExpressionForIndices(const ColumnsDescription & columns, ContextPtr context) const;
|
||||
|
||||
public:
|
||||
Names getAllRegisteredNames() const override;
|
||||
};
|
||||
|
||||
|
@ -779,11 +779,16 @@ void registerStorageKafka(StorageFactory & factory)
|
||||
#undef CHECK_KAFKA_STORAGE_ARGUMENT
|
||||
|
||||
auto num_consumers = kafka_settings->kafka_num_consumers.value;
|
||||
auto physical_cpu_cores = getNumberOfPhysicalCPUCores();
|
||||
auto max_consumers = std::max<uint32_t>(getNumberOfPhysicalCPUCores(), 16);
|
||||
|
||||
if (num_consumers > physical_cpu_cores)
|
||||
if (num_consumers > max_consumers)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be bigger than {}", physical_cpu_cores);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The number of consumers can not be bigger than {}. "
|
||||
"A single consumer can read any number of partitions. Extra consumers are relatively expensive, "
|
||||
"and using a lot of them can lead to high memory and CPU usage. To achieve better performance "
|
||||
"of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, "
|
||||
"and ensure you have enough threads in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). "
|
||||
"See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", max_consumers);
|
||||
}
|
||||
else if (num_consumers < 1)
|
||||
{
|
||||
|
@ -782,7 +782,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
|
||||
Block header = pipes.at(0).getHeader();
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||
sort_description.emplace_back(sort_columns[i], 1, 1);
|
||||
|
||||
/// The order of the streams is important: when the key is matched, the elements go in the order of the source stream number.
|
||||
/// In the merged part, the lines with the same key must be in the ascending order of the identifier of original part,
|
||||
|
@ -1909,6 +1909,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
|
||||
StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
|
||||
|
||||
const auto & settings = local_context->getSettingsRef();
|
||||
const auto & settings_from_storage = getSettings();
|
||||
|
||||
if (!settings.allow_non_metadata_alters)
|
||||
{
|
||||
@ -2099,6 +2100,14 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
|
||||
|
||||
dropped_columns.emplace(command.column_name);
|
||||
}
|
||||
else if (command.type == AlterCommand::RESET_SETTING)
|
||||
{
|
||||
for (const auto & reset_setting : command.settings_resets)
|
||||
{
|
||||
if (!settings_from_storage->has(reset_setting))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot reset setting '{}' because it doesn't exist for MergeTree engines family", reset_setting);
|
||||
}
|
||||
}
|
||||
else if (command.isRequireMutationStage(getInMemoryMetadata()))
|
||||
{
|
||||
/// This alter will override data on disk. Let's check that it doesn't
|
||||
@ -2953,7 +2962,8 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
|
||||
{
|
||||
auto lock = lockParts();
|
||||
|
||||
LOG_TRACE(log, "Trying to immediately remove part {}", part->getNameWithState());
|
||||
auto part_name_with_state = part->getNameWithState();
|
||||
LOG_TRACE(log, "Trying to immediately remove part {}", part_name_with_state);
|
||||
|
||||
if (part->getState() != DataPartState::Temporary)
|
||||
{
|
||||
@ -2964,7 +2974,16 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
|
||||
part.reset();
|
||||
|
||||
if (!((*it)->getState() == DataPartState::Outdated && it->unique()))
|
||||
{
|
||||
if ((*it)->getState() != DataPartState::Outdated)
|
||||
LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state "
|
||||
"usage counter {}", part_name_with_state, it->use_count());
|
||||
|
||||
if (!it->unique())
|
||||
LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now "
|
||||
"usage counter {}", part_name_with_state, it->use_count());
|
||||
return;
|
||||
}
|
||||
|
||||
modifyPartState(it, DataPartState::Deleting);
|
||||
|
||||
@ -3375,7 +3394,12 @@ void MergeTreeData::checkAlterPartitionIsPossible(
|
||||
void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition)
|
||||
{
|
||||
const String partition_id = getPartitionIDFromQuery(partition, getContext());
|
||||
auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id);
|
||||
DataPartsVector parts_to_remove;
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
if (partition_ast && partition_ast->all)
|
||||
parts_to_remove = getDataPartsVector();
|
||||
else
|
||||
parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id);
|
||||
|
||||
UInt64 partition_size = 0;
|
||||
|
||||
@ -3826,6 +3850,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const Block & key_sample_block = metadata_snapshot->getPartitionKey().sample_block;
|
||||
if (partition_ast.all)
|
||||
return "ALL";
|
||||
size_t fields_count = key_sample_block.columns();
|
||||
if (partition_ast.fields_count != fields_count)
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
|
||||
|
@ -81,7 +81,7 @@ struct MergeTreeDataPartTTLInfos
|
||||
bool empty() const
|
||||
{
|
||||
/// part_min_ttl in minimum of rows, rows_where and group_by TTLs
|
||||
return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
|
||||
return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty() && columns_ttl.empty();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -145,7 +145,7 @@ void MergeTreeDataWriter::TemporaryPart::finalize()
|
||||
}
|
||||
|
||||
BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context)
|
||||
const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context)
|
||||
{
|
||||
BlocksWithPartition result;
|
||||
if (!block || !block.rows())
|
||||
@ -282,16 +282,12 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
{
|
||||
TemporaryPart temp_part;
|
||||
Block & block = block_with_partition.block;
|
||||
|
||||
auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
|
||||
auto storage_snapshot = data.getStorageSnapshot(metadata_snapshot);
|
||||
|
||||
if (!storage_snapshot->object_columns.empty())
|
||||
{
|
||||
auto extended_storage_columns = storage_snapshot->getColumns(
|
||||
GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects());
|
||||
|
||||
convertObjectsToTuples(columns, block, extended_storage_columns);
|
||||
}
|
||||
for (auto & column : columns)
|
||||
if (isObject(column.type))
|
||||
column.type = block.getByName(column.name).type;
|
||||
|
||||
static const String TMP_PREFIX = "tmp_insert_";
|
||||
|
||||
@ -333,7 +329,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
sort_description.reserve(sort_columns_size);
|
||||
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
sort_description.emplace_back(block.getPositionByName(sort_columns[i]), 1, 1);
|
||||
sort_description.emplace_back(sort_columns[i], 1, 1);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks);
|
||||
|
||||
@ -466,6 +462,16 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
return temp_part;
|
||||
}
|
||||
|
||||
void MergeTreeDataWriter::deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block)
|
||||
{
|
||||
if (!storage_snapshot->object_columns.empty())
|
||||
{
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
|
||||
auto storage_columns = storage_snapshot->getColumns(options);
|
||||
convertObjectsToTuples(block, storage_columns);
|
||||
}
|
||||
}
|
||||
|
||||
MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
const String & part_name,
|
||||
MergeTreeDataPartType part_type,
|
||||
@ -521,7 +527,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
sort_description.reserve(sort_columns_size);
|
||||
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
sort_description.emplace_back(block.getPositionByName(sort_columns[i]), 1, 1);
|
||||
sort_description.emplace_back(sort_columns[i], 1, 1);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocks);
|
||||
|
||||
|
@ -42,14 +42,12 @@ public:
|
||||
*/
|
||||
static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
|
||||
|
||||
/** All rows must correspond to same partition.
|
||||
* Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData.
|
||||
*/
|
||||
MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, bool optimize_on_insert);
|
||||
static void deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block);
|
||||
|
||||
/// This structure contains not completely written temporary part.
|
||||
/// Some writes may happen asynchronously, e.g. for blob storages.
|
||||
/// You should call finalize() to wait until all data is written.
|
||||
|
||||
struct TemporaryPart
|
||||
{
|
||||
MergeTreeData::MutableDataPartPtr part;
|
||||
@ -65,6 +63,9 @@ public:
|
||||
void finalize();
|
||||
};
|
||||
|
||||
/** All rows must correspond to same partition.
|
||||
* Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData.
|
||||
*/
|
||||
TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
|
||||
|
||||
/// For insertion.
|
||||
|
@ -50,7 +50,9 @@ struct MergeTreeSink::DelayedChunk
|
||||
void MergeTreeSink::consume(Chunk chunk)
|
||||
{
|
||||
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
||||
auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot);
|
||||
|
||||
storage.writer.deduceTypesOfObjectColumns(storage_snapshot, block);
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
|
||||
|
||||
using DelayedPartitions = std::vector<MergeTreeSink::DelayedChunk::Partition>;
|
||||
|
@ -150,7 +150,8 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
|
||||
if (quorum)
|
||||
checkQuorumPrecondition(zookeeper);
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot);
|
||||
storage.writer.deduceTypesOfObjectColumns(storage_snapshot, block);
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
|
||||
|
||||
using DelayedPartitions = std::vector<ReplicatedMergeTreeSink::DelayedChunk::Partition>;
|
||||
@ -158,6 +159,7 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
|
||||
|
||||
size_t streams = 0;
|
||||
bool support_parallel_write = false;
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
for (auto & current_block : part_blocks)
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user