ClickHouse/src/Interpreters/Set.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

683 lines
22 KiB
C++
Raw Normal View History

2018-06-30 21:35:01 +00:00
#include <optional>
#include <Core/Field.h>
2012-08-23 20:22:44 +00:00
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
2012-08-23 20:22:44 +00:00
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
2012-08-23 23:49:28 +00:00
#include <Interpreters/Set.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/NullableUtils.h>
2018-06-30 21:35:01 +00:00
#include <Interpreters/sortBlock.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
2012-08-23 20:22:44 +00:00
#include <Processors/Chunk.h>
#include <Storages/MergeTree/KeyCondition.h>
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
2021-10-02 07:13:14 +00:00
#include <base/range.h>
2022-01-30 19:49:48 +00:00
#include <base/sort.h>
#include <DataTypes/DataTypeLowCardinality.h>
2018-06-30 21:35:01 +00:00
2022-01-30 19:49:48 +00:00
2012-08-23 20:22:44 +00:00
namespace DB
{
2016-01-12 02:21:15 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}
template <typename Method>
void NO_INLINE Set::insertFromBlockImpl(
Method & method,
const ColumnRawPtrs & key_columns,
size_t rows,
SetVariants & variants,
2018-06-30 21:35:01 +00:00
ConstNullMapPtr null_map,
ColumnUInt8::Container * out_filter)
{
if (null_map)
2018-06-30 21:35:01 +00:00
{
if (out_filter)
insertFromBlockImplCase<Method, true, true>(method, key_columns, rows, variants, null_map, out_filter);
else
insertFromBlockImplCase<Method, true, false>(method, key_columns, rows, variants, null_map, out_filter);
}
else
2018-06-30 21:35:01 +00:00
{
if (out_filter)
insertFromBlockImplCase<Method, false, true>(method, key_columns, rows, variants, null_map, out_filter);
else
insertFromBlockImplCase<Method, false, false>(method, key_columns, rows, variants, null_map, out_filter);
}
}
2018-06-30 21:35:01 +00:00
template <typename Method, bool has_null_map, bool build_filter>
void NO_INLINE Set::insertFromBlockImplCase(
Method & method,
const ColumnRawPtrs & key_columns,
size_t rows,
SetVariants & variants,
2019-02-04 16:48:43 +00:00
[[maybe_unused]] ConstNullMapPtr null_map,
[[maybe_unused]] ColumnUInt8::Container * out_filter)
{
2019-02-04 14:36:15 +00:00
typename Method::State state(key_columns, key_sizes, nullptr);
2017-03-03 21:15:46 +00:00
/// For all rows
for (size_t i = 0; i < rows; ++i)
{
2019-02-04 16:48:43 +00:00
if constexpr (has_null_map)
{
2019-02-04 16:48:43 +00:00
if ((*null_map)[i])
{
if constexpr (build_filter)
{
(*out_filter)[i] = false;
}
2019-02-04 16:48:43 +00:00
continue;
}
}
2019-02-04 16:48:43 +00:00
[[maybe_unused]] auto emplace_result = state.emplaceKey(method.data, i, variants.string_pool);
2018-06-30 21:35:01 +00:00
2019-02-04 16:48:43 +00:00
if constexpr (build_filter)
2019-02-04 14:36:15 +00:00
(*out_filter)[i] = emplace_result.isInserted();
}
2012-08-23 20:22:44 +00:00
}
2023-06-20 14:53:13 +00:00
DataTypes Set::getElementTypes(DataTypes types, bool transform_null_in)
2023-06-01 21:15:15 +00:00
{
2023-06-20 14:53:13 +00:00
for (auto & type : types)
2023-06-01 21:15:15 +00:00
{
2023-06-20 14:53:13 +00:00
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
type = low_cardinality_type->getDictionaryType();
2023-06-01 21:15:15 +00:00
if (!transform_null_in)
2023-06-20 14:53:13 +00:00
type = removeNullable(type);
2023-06-01 21:15:15 +00:00
}
2023-06-20 14:53:13 +00:00
return types;
2023-06-01 21:15:15 +00:00
}
void Set::setHeader(const ColumnsWithTypeAndName & header)
2012-08-23 20:22:44 +00:00
{
std::lock_guard lock(rwlock);
2020-12-29 18:16:24 +00:00
if (!data.empty())
2018-04-19 21:34:04 +00:00
return;
keys_size = header.size();
ColumnRawPtrs key_columns;
key_columns.reserve(keys_size);
2018-04-19 21:34:04 +00:00
data_types.reserve(keys_size);
2019-10-31 16:14:06 +00:00
set_elements_types.reserve(keys_size);
2018-04-19 21:34:04 +00:00
/// The constant columns to the right of IN are not supported directly. For this, they first materialize.
Columns materialized_columns;
/// Remember the columns we will work with
for (size_t i = 0; i < keys_size; ++i)
2016-07-10 18:53:27 +00:00
{
materialized_columns.emplace_back(header.at(i).column->convertToFullColumnIfConst());
key_columns.emplace_back(materialized_columns.back().get());
data_types.emplace_back(header.at(i).type);
set_elements_types.emplace_back(header.at(i).type);
2018-04-19 21:34:04 +00:00
/// Convert low cardinality column to full.
2020-04-22 06:01:33 +00:00
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_types.back().get()))
{
data_types.back() = low_cardinality_type->getDictionaryType();
set_elements_types.back() = low_cardinality_type->getDictionaryType();
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality());
key_columns.back() = materialized_columns.back().get();
}
2016-07-10 18:53:27 +00:00
}
2018-04-19 21:34:04 +00:00
/// We will insert to the Set only keys, where all components are not NULL.
ConstNullMapPtr null_map{};
ColumnPtr null_map_holder;
if (!transform_null_in)
2021-01-31 18:49:27 +00:00
{
/// We convert nullable columns to non nullable we also need to update nullable types
for (size_t i = 0; i < set_elements_types.size(); ++i)
{
data_types[i] = removeNullable(data_types[i]);
set_elements_types[i] = removeNullable(set_elements_types[i]);
}
extractNestedColumnsAndNullMap(key_columns, null_map);
2021-01-31 18:49:27 +00:00
}
2018-04-19 21:34:04 +00:00
/// Choose data structure to use for the set.
data.init(data.chooseMethod(key_columns, key_sizes));
}
2023-06-22 14:23:04 +00:00
void Set::fillSetElements()
2023-05-25 13:33:52 +00:00
{
2023-06-22 14:23:04 +00:00
fill_set_elements = true;
2023-05-25 13:33:52 +00:00
set_elements.reserve(keys_size);
for (const auto & type : set_elements_types)
set_elements.emplace_back(type->createColumn());
}
bool Set::insertFromBlock(const ColumnsWithTypeAndName & columns)
{
Columns cols;
cols.reserve(columns.size());
for (const auto & column : columns)
cols.emplace_back(column.column);
2023-05-25 13:33:52 +00:00
return insertFromColumns(cols);
}
2023-05-25 13:33:52 +00:00
bool Set::insertFromColumns(const Columns & columns)
{
size_t rows = columns.at(0)->size();
SetKeyColumns holder;
/// Filter to extract distinct values from the block.
if (fill_set_elements)
holder.filter = ColumnUInt8::create(rows);
bool inserted = insertFromColumns(columns, holder);
if (inserted && fill_set_elements)
2023-06-01 21:15:15 +00:00
{
if (max_elements_to_fill && max_elements_to_fill < data.getTotalRowCount())
{
/// Drop filled elementes
fill_set_elements = false;
set_elements.clear();
}
else
appendSetElements(holder);
}
2023-05-25 13:33:52 +00:00
return inserted;
}
bool Set::insertFromColumns(const Columns & columns, SetKeyColumns & holder)
2018-04-19 21:34:04 +00:00
{
std::lock_guard lock(rwlock);
2018-04-19 21:34:04 +00:00
2020-12-29 18:16:24 +00:00
if (data.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Method Set::setHeader must be called before Set::insertFromBlock");
2018-04-19 21:34:04 +00:00
2023-05-25 13:33:52 +00:00
holder.key_columns.reserve(keys_size);
2023-05-26 19:25:33 +00:00
holder.materialized_columns.reserve(keys_size);
2017-04-02 17:37:49 +00:00
/// Remember the columns we will work with
for (size_t i = 0; i < keys_size; ++i)
{
2023-05-26 19:25:33 +00:00
holder.materialized_columns.emplace_back(columns.at(i)->convertToFullIfNeeded());
holder.key_columns.emplace_back(holder.materialized_columns.back().get());
}
size_t rows = columns.at(0)->size();
/// We will insert to the Set only keys, where all components are not NULL.
ConstNullMapPtr null_map{};
ColumnPtr null_map_holder;
if (!transform_null_in)
2023-05-26 19:25:33 +00:00
null_map_holder = extractNestedColumnsAndNullMap(holder.key_columns, null_map);
switch (data.type)
{
case SetVariants::Type::EMPTY:
break;
#define M(NAME) \
case SetVariants::Type::NAME: \
2023-05-26 19:25:33 +00:00
insertFromBlockImpl(*data.NAME, holder.key_columns, rows, data, null_map, holder.filter ? &holder.filter->getData() : nullptr); \
break;
APPLY_FOR_SET_VARIANTS(M)
#undef M
}
2012-08-23 23:49:28 +00:00
2023-05-25 13:33:52 +00:00
return limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
}
void Set::appendSetElements(SetKeyColumns & holder)
{
2023-06-16 19:38:50 +00:00
if (holder.key_columns.size() != keys_size || set_elements.size() != keys_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of key columns for set. Expected {} got {} and {}",
keys_size, holder.key_columns.size(), set_elements.size());
2023-05-25 13:33:52 +00:00
size_t rows = holder.key_columns.at(0)->size();
for (size_t i = 0; i < keys_size; ++i)
{
2023-05-25 13:33:52 +00:00
auto filtered_column = holder.key_columns[i]->filter(holder.filter->getData(), rows);
if (set_elements[i]->empty())
set_elements[i] = filtered_column;
else
set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
if (transform_null_in && holder.null_map_holder)
set_elements[i]->insert(Null{});
}
2012-08-23 20:22:44 +00:00
}
void Set::checkIsCreated() const
2023-04-04 10:01:01 +00:00
{
if (!is_created.load())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Trying to use set before it has been built.");
2023-04-04 10:01:01 +00:00
}
2018-04-19 21:34:04 +00:00
ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) const
2012-08-23 20:22:44 +00:00
{
size_t num_key_columns = columns.size();
if (0 == num_key_columns)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method.");
auto res = ColumnUInt8::create();
ColumnUInt8::Container & vec_res = res->getData();
vec_res.resize(columns.at(0).column->size());
if (vec_res.empty())
return res;
std::shared_lock lock(rwlock);
2017-03-03 21:15:46 +00:00
/// If the set is empty.
2012-08-24 20:40:34 +00:00
if (data_types.empty())
{
if (negative)
memset(vec_res.data(), 1, vec_res.size());
else
memset(vec_res.data(), 0, vec_res.size());
return res;
}
2019-08-19 16:51:50 +00:00
checkColumnsNumber(num_key_columns);
/// Remember the columns we will work with. Also check that the data types are correct.
ColumnRawPtrs key_columns;
key_columns.reserve(num_key_columns);
/// The constant columns to the left of IN are not supported directly. For this, they first materialize.
Columns materialized_columns;
2020-12-06 11:58:54 +00:00
materialized_columns.reserve(num_key_columns);
for (size_t i = 0; i < num_key_columns; ++i)
{
ColumnPtr result;
const auto & column_before_cast = columns.at(i);
ColumnWithTypeAndName column_to_cast
= {column_before_cast.column->convertToFullColumnIfConst(), column_before_cast.type, column_before_cast.name};
if (!transform_null_in && data_types[i]->canBeInsideNullable())
{
2020-12-17 18:32:25 +00:00
result = castColumnAccurateOrNull(column_to_cast, data_types[i]);
}
else
{
2020-12-17 18:32:25 +00:00
result = castColumnAccurate(column_to_cast, data_types[i]);
}
materialized_columns.emplace_back() = result;
key_columns.emplace_back() = materialized_columns.back().get();
}
/// We will check existence in Set only for keys whose components do not contain any NULL value.
ConstNullMapPtr null_map{};
ColumnPtr null_map_holder;
if (!transform_null_in)
null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
executeOrdinary(key_columns, vec_res, negative, null_map);
return res;
}
2012-08-23 22:40:51 +00:00
bool Set::empty() const
{
std::shared_lock lock(rwlock);
return data.empty();
}
size_t Set::getTotalRowCount() const
{
std::shared_lock lock(rwlock);
return data.getTotalRowCount();
}
size_t Set::getTotalByteCount() const
{
std::shared_lock lock(rwlock);
return data.getTotalByteCount();
}
template <typename Method>
void NO_INLINE Set::executeImpl(
Method & method,
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
size_t rows,
ConstNullMapPtr null_map) const
{
if (null_map)
executeImplCase<Method, true>(method, key_columns, vec_res, negative, rows, null_map);
else
executeImplCase<Method, false>(method, key_columns, vec_res, negative, rows, null_map);
}
template <typename Method, bool has_null_map>
void NO_INLINE Set::executeImplCase(
Method & method,
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
size_t rows,
ConstNullMapPtr null_map) const
{
2019-02-04 14:36:15 +00:00
Arena pool;
typename Method::State state(key_columns, key_sizes, nullptr);
2019-02-04 19:40:06 +00:00
/// NOTE Optimization is not used for consecutive identical strings.
2017-04-02 17:37:49 +00:00
/// For all rows
for (size_t i = 0; i < rows; ++i)
2012-08-23 20:35:05 +00:00
{
if (has_null_map && (*null_map)[i])
2020-04-06 13:30:16 +00:00
{
vec_res[i] = negative;
2020-04-06 13:30:16 +00:00
}
else
{
2019-02-04 14:36:15 +00:00
auto find_result = state.findKey(method.data, i, pool);
vec_res[i] = negative ^ find_result.isFound();
}
}
}
void Set::executeOrdinary(
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
ConstNullMapPtr null_map) const
{
size_t rows = key_columns[0]->size();
switch (data.type)
{
case SetVariants::Type::EMPTY:
break;
#define M(NAME) \
case SetVariants::Type::NAME: \
executeImpl(*data.NAME, key_columns, vec_res, negative, rows, null_map); \
break;
APPLY_FOR_SET_VARIANTS(M)
#undef M
}
2012-08-23 20:22:44 +00:00
}
2019-08-19 16:51:50 +00:00
void Set::checkColumnsNumber(size_t num_key_columns) const
{
if (data_types.size() != num_key_columns)
{
2020-11-10 18:22:26 +00:00
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH,
"Number of columns in section IN doesn't match. {} at left, {} at right.",
num_key_columns, data_types.size());
2019-08-19 16:51:50 +00:00
}
}
bool Set::areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const
{
/// Out-of-bound access can happen when same set expression built with different columns.
/// Caller may call this method to make sure that the set is indeed the one they want
/// without awaring data_types.size().
if (set_type_idx >= data_types.size())
return false;
return removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))
->equals(*removeNullable(recursiveRemoveLowCardinality(other_type)));
}
2019-08-19 16:51:50 +00:00
void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const
{
if (!this->areTypesEqual(set_type_idx, other_type))
throw Exception(ErrorCodes::TYPE_MISMATCH, "Types of column {} in section IN don't match: "
"{} on the left, {} on the right", toString(set_type_idx + 1),
other_type->getName(), data_types[set_type_idx]->getName());
2019-08-19 16:51:50 +00:00
}
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_)
: has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_))
{
2023-05-26 19:25:33 +00:00
// std::cerr << "MergeTreeSetIndex::MergeTreeSetIndex "
// << set_elements.size() << ' ' << indexes_mapping.size() << std::endl;
// for (const auto & vv : indexes_mapping)
// std::cerr << vv.key_index << ' ' << vv.tuple_index << std::endl;
2022-01-30 19:49:48 +00:00
::sort(indexes_mapping.begin(), indexes_mapping.end(),
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
2018-02-08 15:31:37 +00:00
{
return std::tie(l.key_index, l.tuple_index) < std::tie(r.key_index, r.tuple_index);
2018-02-08 15:31:37 +00:00
});
2018-04-18 18:44:02 +00:00
indexes_mapping.erase(std::unique(
indexes_mapping.begin(), indexes_mapping.end(),
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return l.key_index == r.key_index;
2018-04-18 18:44:02 +00:00
}), indexes_mapping.end());
2018-06-30 21:35:01 +00:00
size_t tuple_size = indexes_mapping.size();
ordered_set.resize(tuple_size);
2018-06-30 21:35:01 +00:00
for (size_t i = 0; i < tuple_size; ++i)
ordered_set[i] = set_elements[indexes_mapping[i].tuple_index];
2018-04-18 18:44:02 +00:00
2018-06-30 21:35:01 +00:00
Block block_to_sort;
SortDescription sort_description;
for (size_t i = 0; i < tuple_size; ++i)
{
2022-07-31 00:38:52 +00:00
String column_name = "_" + toString(i);
block_to_sort.insert({ordered_set[i], nullptr, column_name});
sort_description.emplace_back(column_name, 1, 1);
}
2018-06-30 21:35:01 +00:00
sortBlock(block_to_sort, sort_description);
for (size_t i = 0; i < tuple_size; ++i)
ordered_set[i] = block_to_sort.getByPosition(i).column;
}
2018-02-08 14:15:21 +00:00
/** Return the BoolMask where:
* 1: the intersection of the set and the range is non-empty
* 2: the range contains elements not in the set
*/
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types, bool single_point) const
{
2018-06-30 21:35:01 +00:00
size_t tuple_size = indexes_mapping.size();
2023-05-26 19:25:33 +00:00
// std::cerr << "MergeTreeSetIndex::checkInRange " << single_point << ' ' << tuple_size << ' ' << has_all_keys << std::endl;
2018-06-30 21:35:01 +00:00
FieldValues left_point;
FieldValues right_point;
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
left_point.emplace_back(ordered_set[i]->cloneEmpty());
right_point.emplace_back(ordered_set[i]->cloneEmpty());
}
bool left_included = true;
bool right_included = true;
2018-06-30 21:35:01 +00:00
for (size_t i = 0; i < tuple_size; ++i)
{
std::optional<Range> new_range = KeyCondition::applyMonotonicFunctionsChainToRange(
key_ranges[indexes_mapping[i].key_index],
2018-02-08 15:31:37 +00:00
indexes_mapping[i].functions,
data_types[indexes_mapping[i].key_index],
single_point);
if (!new_range)
return {true, true};
2014-04-01 10:09:22 +00:00
left_point[i].update(new_range->left);
left_included &= new_range->left_included;
right_point[i].update(new_range->right);
right_included &= new_range->right_included;
}
2018-02-08 15:31:37 +00:00
/// lhs < rhs return -1
/// lhs == rhs return 0
/// lhs > rhs return 1
auto compare = [](const IColumn & lhs, const FieldValue & rhs, size_t row)
{
if (rhs.isNegativeInfinity())
return 1;
if (rhs.isPositiveInfinity())
{
Field f;
lhs.get(row, f);
if (f.isNull())
return 0; // +Inf == +Inf
2018-02-08 15:31:37 +00:00
else
return -1;
}
return lhs.compareAt(row, 0, *rhs.column, 1);
2018-06-30 21:35:01 +00:00
};
auto less = [this, &compare, tuple_size](size_t row, const auto & point)
{
for (size_t i = 0; i < tuple_size; ++i)
{
int res = compare(*ordered_set[i], point[i], row);
if (res)
return res < 0;
}
return false;
};
auto equals = [this, &compare, tuple_size](size_t row, const auto & point)
{
for (size_t i = 0; i < tuple_size; ++i)
if (compare(*ordered_set[i], point[i], row) != 0)
return false;
return true;
};
2020-03-10 14:56:55 +00:00
/** Because each hyperrectangle maps to a contiguous sequence of elements
* laid out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds
*/
2021-06-15 19:55:21 +00:00
auto indices = collections::range(0, size());
auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, less);
auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, less);
2018-06-30 21:35:01 +00:00
/// A special case of 1-element KeyRange. It's useful for partition pruning.
bool one_element_range = true;
for (size_t i = 0; i < tuple_size; ++i)
{
auto & left = left_point[i];
auto & right = right_point[i];
if (left.isNormal() && right.isNormal())
{
if (0 != left.column->compareAt(0, 0, *right.column, 1))
{
one_element_range = false;
break;
}
}
2021-06-19 06:41:37 +00:00
else if ((left.isPositiveInfinity() && right.isPositiveInfinity()) || (left.isNegativeInfinity() && right.isNegativeInfinity()))
{
/// Special value equality.
}
else
{
one_element_range = false;
break;
}
}
if (one_element_range && has_all_keys)
{
2021-06-07 09:36:34 +00:00
/// Here we know that there is one element in range.
/// The main difference with the normal case is that we can definitely say that
/// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0).
2021-06-19 06:41:37 +00:00
/// Check if it's an empty range
if (!left_included || !right_included)
return {false, true};
else if (left_lower != indices.end() && equals(*left_lower, left_point))
return {true, false};
else
return {false, true};
}
/// If there are more than one element in the range, it can always be false. Thus we only need to check if it may be true or not.
2021-06-19 06:41:37 +00:00
/// Given left_lower >= left_point, right_lower >= right_point, find if there may be a match in between left_lower and right_lower.
if (left_lower + 1 < right_lower)
2018-06-30 21:35:01 +00:00
{
2021-06-19 06:41:37 +00:00
/// There is an point in between: left_lower + 1
return {true, true};
}
else if (left_lower + 1 == right_lower)
{
/// Need to check if left_lower is a valid match, as left_point <= left_lower < right_point <= right_lower.
/// Note: left_lower is valid.
if (left_included || !equals(*left_lower, left_point))
return {true, true};
2021-06-19 06:41:37 +00:00
/// We are unlucky that left_point fails to cover a point. Now we need to check if right_point can cover right_lower.
/// Check if there is a match at the right boundary.
return {right_included && right_lower != indices.end() && equals(*right_lower, right_point), true};
}
else // left_lower == right_lower
{
/// Need to check if right_point is a valid match, as left_point < right_point <= left_lower = right_lower.
/// Check if there is a match at the left boundary.
return {right_included && right_lower != indices.end() && equals(*right_lower, right_point), true};
}
}
bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const
{
for (const auto & mapping : indexes_mapping)
if (!mapping.functions.empty())
return true;
return false;
}
void FieldValue::update(const Field & x)
{
if (x.isNegativeInfinity() || x.isPositiveInfinity())
value = x;
else
{
/// Keep at most one element in column.
if (!column->empty())
column->popBack(1);
column->insert(x);
value = Field(); // Set back to normal value.
}
}
2012-08-23 20:22:44 +00:00
}