ClickHouse/src/Interpreters/Set.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

252 lines
8.5 KiB
C++
Raw Normal View History

2012-08-22 20:29:01 +00:00
#pragma once
#include <Core/Block.h>
2021-10-15 20:18:20 +00:00
#include <QueryPipeline/SizeLimits.h>
#include <DataTypes/IDataType.h>
#include <Interpreters/SetVariants.h>
2023-06-22 14:23:04 +00:00
#include <Interpreters/SetKeys.h>
#include <Parsers/IAST.h>
#include <Storages/MergeTree/BoolMask.h>
2012-08-22 20:29:01 +00:00
2023-01-12 15:51:04 +00:00
#include <Common/SharedMutex.h>
#include <Interpreters/castColumn.h>
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
namespace DB
{
2017-01-14 09:11:11 +00:00
struct Range;
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
class Context;
2018-02-08 17:46:22 +00:00
class IFunctionBase;
2022-11-30 18:48:09 +00:00
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
class Chunk;
2018-06-30 21:35:01 +00:00
2017-03-03 21:15:46 +00:00
/** Data structure for implementation of IN expression.
2012-08-22 20:29:01 +00:00
*/
2012-08-23 22:40:51 +00:00
class Set
2012-08-22 20:29:01 +00:00
{
2012-08-23 22:40:51 +00:00
public:
2018-07-02 18:57:14 +00:00
/// 'fill_set_elements': in addition to hash table
/// (that is useful only for checking that some value is in the set and may not store the original values),
/// store all set elements in explicit form.
/// This is needed for subsequent use for index.
2023-06-22 14:23:04 +00:00
Set(const SizeLimits & limits_, size_t max_elements_to_fill_, bool transform_null_in_)
2024-01-23 17:04:50 +00:00
: log(getLogger("Set")),
limits(limits_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_),
cast_cache(std::make_unique<InternalCastFunctionCache>())
{}
2018-04-19 21:34:04 +00:00
/** Set can be created either from AST or from a stream of data (subquery result).
*/
/** Create a Set from stream.
* Call setHeader, then call insertFromBlock for each block.
*/
void setHeader(const ColumnsWithTypeAndName & header);
2018-04-19 21:34:04 +00:00
/// Returns false, if some limit was exceeded and no need to insert more data.
2023-05-25 13:33:52 +00:00
bool insertFromColumns(const Columns & columns);
bool insertFromBlock(const ColumnsWithTypeAndName & columns);
2023-06-22 14:23:04 +00:00
void fillSetElements();
2023-05-25 13:33:52 +00:00
bool insertFromColumns(const Columns & columns, SetKeyColumns & holder);
void appendSetElements(SetKeyColumns & holder);
2019-11-01 10:58:29 +00:00
/// Call after all blocks were inserted. To get the information that set is already created.
2023-04-11 21:19:44 +00:00
void finishInsert() { is_created = true; }
2019-11-01 10:58:29 +00:00
/// finishInsert and isCreated are thread-safe
bool isCreated() const { return is_created.load(); }
void checkIsCreated() const;
2023-04-04 10:01:01 +00:00
2017-03-03 21:15:46 +00:00
/** For columns of 'block', check belonging of corresponding rows to the set.
* Return UInt8 column with the result.
2012-08-23 20:22:44 +00:00
*/
ColumnPtr execute(const ColumnsWithTypeAndName & columns, bool negative) const;
2014-03-26 10:56:21 +00:00
bool hasNull() const;
bool empty() const;
size_t getTotalRowCount() const;
size_t getTotalByteCount() const;
const DataTypes & getDataTypes() const { return data_types; }
2019-10-31 16:14:06 +00:00
const DataTypes & getElementsTypes() const { return set_elements_types; }
2023-05-25 13:33:52 +00:00
bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); }
2023-12-06 12:53:14 +00:00
bool hasSetElements() const { return !set_elements.empty(); }
Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; }
2019-08-19 16:51:50 +00:00
void checkColumnsNumber(size_t num_key_columns) const;
bool areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const;
2019-08-19 16:51:50 +00:00
void checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const;
2023-06-20 14:53:13 +00:00
static DataTypes getElementTypes(DataTypes types, bool transform_null_in);
2023-06-01 21:15:15 +00:00
2012-08-23 20:22:44 +00:00
private:
size_t keys_size = 0;
Sizes key_sizes;
SetVariants data;
/** How IN works with Nullable types.
*
* For simplicity reasons, all NULL values and any tuples with at least one NULL element are ignored in the Set.
* And for left hand side values, that are NULLs or contain any NULLs, we return 0 (means that element is not in Set).
*
* If we want more standard compliant behaviour, we must return NULL
* if lhs is NULL and set is not empty or if lhs is not in set, but set contains at least one NULL.
* It is more complicated with tuples.
* For example,
* (1, NULL, 2) IN ((1, NULL, 3)) must return 0,
* but (1, NULL, 2) IN ((1, 1111, 2)) must return NULL.
*
* We have not implemented such sophisticated behaviour.
*/
2017-06-02 21:37:28 +00:00
/** The data types from which the set was created.
* When checking for belonging to a set, the types of columns to be checked must match with them.
2012-08-23 22:40:51 +00:00
*/
DataTypes data_types;
2019-10-31 16:14:06 +00:00
/// Types for set_elements.
DataTypes set_elements_types;
2024-01-23 17:04:50 +00:00
LoggerPtr log;
2017-06-02 21:37:28 +00:00
/// Limitations on the maximum size of the set
SizeLimits limits;
2018-07-02 18:57:14 +00:00
/// Do we need to additionally store all elements of the set in explicit form for subsequent use for index.
2023-06-22 14:23:04 +00:00
bool fill_set_elements = false;
2023-06-01 21:15:15 +00:00
size_t max_elements_to_fill;
2018-07-02 18:57:14 +00:00
/// If true, insert NULL values to set.
bool transform_null_in;
2020-04-06 13:30:16 +00:00
2019-11-01 10:58:29 +00:00
/// Check if set contains all the data.
std::atomic<bool> is_created = false;
2019-11-01 10:58:29 +00:00
2017-06-02 21:37:28 +00:00
/// If in the left part columns contains the same types as the elements of the set.
void executeOrdinary(
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
const PaddedPODArray<UInt8> * null_map) const;
2018-06-30 21:35:01 +00:00
/// Collected elements of `Set`.
2017-06-02 21:37:28 +00:00
/// It is necessary for the index to work on the primary key in the IN statement.
std::vector<IColumn::WrappedPtr> set_elements;
2017-06-02 21:37:28 +00:00
/** Protects work with the set in the functions `insertFromBlock` and `execute`.
* These functions can be called simultaneously from different threads only when using StorageSet,
2015-01-27 00:52:03 +00:00
*/
2023-01-12 15:51:04 +00:00
mutable SharedMutex rwlock;
/// A cache for cast functions (if any) to avoid rebuilding cast functions
/// for every call to `execute`
mutable std::unique_ptr<InternalCastFunctionCache> cast_cache;
template <typename Method>
void insertFromBlockImpl(
Method & method,
const ColumnRawPtrs & key_columns,
size_t rows,
SetVariants & variants,
2018-06-30 21:35:01 +00:00
ConstNullMapPtr null_map,
ColumnUInt8::Container * out_filter);
2018-06-30 21:35:01 +00:00
template <typename Method, bool has_null_map, bool build_filter>
void insertFromBlockImplCase(
Method & method,
const ColumnRawPtrs & key_columns,
size_t rows,
SetVariants & variants,
2018-06-30 21:35:01 +00:00
ConstNullMapPtr null_map,
ColumnUInt8::Container * out_filter);
template <typename Method>
void executeImpl(
Method & method,
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
size_t rows,
ConstNullMapPtr null_map) const;
template <typename Method, bool has_null_map>
void executeImplCase(
Method & method,
const ColumnRawPtrs & key_columns,
ColumnUInt8::Container & vec_res,
bool negative,
size_t rows,
ConstNullMapPtr null_map) const;
2012-08-22 20:29:01 +00:00
};
using SetPtr = std::shared_ptr<Set>;
using ConstSetPtr = std::shared_ptr<const Set>;
using Sets = std::vector<SetPtr>;
2012-08-22 20:29:01 +00:00
2018-06-30 21:35:01 +00:00
class IFunction;
using FunctionPtr = std::shared_ptr<IFunction>;
/** Class that represents single value with possible infinities.
* Single field is stored in column for more optimal inplace comparisons with other regular columns.
* Extracting fields from columns and further their comparison is suboptimal and requires extra copying.
*/
struct FieldValue
{
explicit FieldValue(MutableColumnPtr && column_) : column(std::move(column_)) {}
void update(const Field & x);
bool isNormal() const { return !value.isPositiveInfinity() && !value.isNegativeInfinity(); }
bool isPositiveInfinity() const { return value.isPositiveInfinity(); }
bool isNegativeInfinity() const { return value.isNegativeInfinity(); }
Field value; // Null, -Inf, +Inf
// If value is Null, uses the actual value in column
MutableColumnPtr column;
};
/// Class for checkInRange function.
2018-02-08 15:31:37 +00:00
class MergeTreeSetIndex
{
public:
2018-02-08 14:15:21 +00:00
/** Mapping for tuple positions from Set::set_elements to
2018-07-02 18:57:14 +00:00
* position of pk index and functions chain applied to this column.
2018-02-08 14:15:21 +00:00
*/
struct KeyTuplePositionMapping
2018-02-08 15:31:37 +00:00
{
size_t tuple_index;
size_t key_index;
2018-02-08 17:46:22 +00:00
std::vector<FunctionBasePtr> functions;
};
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
2018-06-30 21:35:01 +00:00
size_t size() const { return ordered_set.at(0)->size(); }
2018-04-20 01:14:04 +00:00
bool hasMonotonicFunctionsChain() const;
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types, bool single_point = false) const;
2018-04-20 01:14:04 +00:00
const Columns & getOrderedSet() const { return ordered_set; }
private:
// If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
bool has_all_keys;
2018-06-30 21:35:01 +00:00
Columns ordered_set;
std::vector<KeyTuplePositionMapping> indexes_mapping;
using FieldValues = std::vector<FieldValue>;
};
2019-03-08 03:23:34 +00:00
}