ClickHouse/src/Interpreters/PreparedSets.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

129 lines
4.4 KiB
C++
Raw Normal View History

2019-01-18 16:30:35 +00:00
#pragma once
#include <Parsers/IAST.h>
#include <DataTypes/IDataType.h>
#include <future>
2019-01-18 16:30:35 +00:00
#include <memory>
#include <unordered_map>
#include <vector>
#include <DataTypes/DataTypeLowCardinality.h>
2022-07-18 15:53:30 +00:00
#include <Storages/IStorage_fwd.h>
#include <QueryPipeline/SizeLimits.h>
#include <Processors/QueryPlan/QueryPlan.h>
2020-10-26 19:12:40 +00:00
2019-01-18 16:30:35 +00:00
namespace DB
{
2022-07-18 15:53:30 +00:00
class QueryPlan;
class Set;
using SetPtr = std::shared_ptr<Set>;
class InterpreterSelectWithUnionQuery;
2022-07-18 15:53:30 +00:00
/// Information on how to build set for the [GLOBAL] IN section.
class SubqueryForSet
2022-07-18 15:53:30 +00:00
{
public:
2022-07-18 15:53:30 +00:00
void createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_ = nullptr);
bool hasSource() const;
/// Returns query plan for the set's source
/// and removes it from SubqueryForSet because we need to build it only once.
std::unique_ptr<QueryPlan> detachSource();
2022-07-18 15:53:30 +00:00
/// Build this set from the result of the subquery.
SetPtr set;
2022-07-18 15:53:30 +00:00
/// If set, put the result into the table.
/// This is a temporary table for transferring to remote servers for distributed query processing.
StoragePtr table;
/// The source is obtained using the InterpreterSelectQuery subquery.
std::unique_ptr<QueryPlan> source;
2022-07-18 15:53:30 +00:00
};
struct PreparedSetKey
2019-01-18 16:30:35 +00:00
{
/// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired
/// data types of set elements (two different Sets can be required for two tuples with the same contents
/// if left hand sides of the IN operators have different types).
2022-07-18 15:53:30 +00:00
static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_);
/// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting
/// set is fully determined by the subquery.
2022-07-18 15:53:30 +00:00
static PreparedSetKey forSubquery(const IAST & ast);
IAST::Hash ast_hash;
DataTypes types; /// Empty for subqueries.
2022-07-18 15:53:30 +00:00
bool operator==(const PreparedSetKey & other) const;
struct Hash
{
UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; }
};
2019-01-18 16:30:35 +00:00
};
2022-07-18 15:53:30 +00:00
class PreparedSets
{
public:
using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
SubqueryForSet & createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key,
SizeLimits set_size_limit, bool transform_null_in);
SubqueryForSet & getSubquery(const String & subquery_id);
2022-07-18 15:53:30 +00:00
void set(const PreparedSetKey & key, SetPtr set_);
SetPtr & get(const PreparedSetKey & key);
2022-07-18 15:53:30 +00:00
/// Get subqueries and clear them.
/// We need to build a plan for subqueries just once. That's why we can clear them after accessing them.
/// SetPtr would still be available for consumers of PreparedSets.
SubqueriesForSets detachSubqueries();
2022-07-18 15:53:30 +00:00
/// Returns all sets that match the given ast hash not checking types
2022-07-18 15:53:30 +00:00
/// Used in KeyCondition and MergeTreeIndexConditionBloomFilter to make non exact match for types in PreparedSetKey
std::vector<SetPtr> getByTreeHash(IAST::Hash ast_hash);
2022-07-18 15:53:30 +00:00
bool empty() const;
private:
std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash> sets;
/// This is the information required for building sets
SubqueriesForSets subqueries;
};
2019-01-18 16:30:35 +00:00
2022-07-18 15:53:30 +00:00
using PreparedSetsPtr = std::shared_ptr<PreparedSets>;
2019-01-18 16:30:35 +00:00
/// This set cache is used to avoid building the same set multiple times. It is different from PreparedSets in way that
/// it can be used across multiple queries. One use case is when we execute the same mutation on multiple parts. In this
/// case each part is processed by a separate mutation task but they can share the same set.
class PreparedSetsCache
{
public:
/// Returns the set from the cache or builds it using the provided function.
/// If the set is already being built by another task, then this call will wait for the set to be built.
SetPtr findOrBuild(const PreparedSetKey & key, const std::function<SetPtr()> & build_set);
private:
struct Entry
{
std::promise<SetPtr> promise; /// The promise is set when the set is built by the first task.
std::shared_future<SetPtr> filled_set; /// Other tasks can wait for the set to be built.
};
using EntryPtr = std::shared_ptr<Entry>;
/// Protects just updates to the cache. When we got EntyPtr from the cache we can access it without locking.
std::mutex cache_mutex;
std::unordered_map<PreparedSetKey, EntryPtr, PreparedSetKey::Hash> cache;
};
using PreparedSetsCachePtr = std::shared_ptr<PreparedSetsCache>;
2019-01-18 16:30:35 +00:00
}