mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-14 19:45:11 +00:00
Merge branch 'ClickHouse:master' into master
This commit is contained in:
commit
1f2e425d5b
1
.gitignore
vendored
1
.gitignore
vendored
@ -159,6 +159,7 @@ website/package-lock.json
|
||||
/programs/server/store
|
||||
/programs/server/uuid
|
||||
/programs/server/coordination
|
||||
/programs/server/workload
|
||||
|
||||
# temporary test files
|
||||
tests/queries/0_stateless/test_*
|
||||
|
@ -3224,6 +3224,34 @@ Default value: "default"
|
||||
**See Also**
|
||||
- [Workload Scheduling](/docs/en/operations/workload-scheduling.md)
|
||||
|
||||
## workload_path {#workload_path}
|
||||
|
||||
The directory used as a storage for all `CREATE WORKLOAD` and `CREATE RESOURCE` queries. By default `/workload/` folder under server working directory is used.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<workload_path>/var/lib/clickhouse/workload/</workload_path>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
- [Workload Hierarchy](/docs/en/operations/workload-scheduling.md#workloads)
|
||||
- [workload_zookeeper_path](#workload_zookeeper_path)
|
||||
|
||||
## workload_zookeeper_path {#workload_zookeeper_path}
|
||||
|
||||
The path to a ZooKeeper node, which is used as a storage for all `CREATE WORKLOAD` and `CREATE RESOURCE` queries. For consistency all SQL definitions are stored as a value of this single znode. By default ZooKeeper is not used and definitions are stored on [disk](#workload_path).
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<workload_zookeeper_path>/clickhouse/workload/definitions.sql</workload_zookeeper_path>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
- [Workload Hierarchy](/docs/en/operations/workload-scheduling.md#workloads)
|
||||
- [workload_path](#workload_path)
|
||||
|
||||
## max_authentication_methods_per_user {#max_authentication_methods_per_user}
|
||||
|
||||
The maximum number of authentication methods a user can be created with or altered to.
|
||||
|
37
docs/en/operations/system-tables/resources.md
Normal file
37
docs/en/operations/system-tables/resources.md
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/resources
|
||||
---
|
||||
# resources
|
||||
|
||||
Contains information for [resources](/docs/en/operations/workload-scheduling.md#workload_entity_storage) residing on the local server. The table contains a row for every resource.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.resources
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
name: io_read
|
||||
read_disks: ['s3']
|
||||
write_disks: []
|
||||
create_query: CREATE RESOURCE io_read (READ DISK s3)
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
name: io_write
|
||||
read_disks: []
|
||||
write_disks: ['s3']
|
||||
create_query: CREATE RESOURCE io_write (WRITE DISK s3)
|
||||
```
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` (`String`) - Resource name.
|
||||
- `read_disks` (`Array(String)`) - The array of disk names that uses this resource for read operations.
|
||||
- `write_disks` (`Array(String)`) - The array of disk names that uses this resource for write operations.
|
||||
- `create_query` (`String`) - The definition of the resource.
|
40
docs/en/operations/system-tables/workloads.md
Normal file
40
docs/en/operations/system-tables/workloads.md
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/workloads
|
||||
---
|
||||
# workloads
|
||||
|
||||
Contains information for [workloads](/docs/en/operations/workload-scheduling.md#workload_entity_storage) residing on the local server. The table contains a row for every workload.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.workloads
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
name: production
|
||||
parent: all
|
||||
create_query: CREATE WORKLOAD production IN `all` SETTINGS weight = 9
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
name: development
|
||||
parent: all
|
||||
create_query: CREATE WORKLOAD development IN `all`
|
||||
|
||||
Row 3:
|
||||
──────
|
||||
name: all
|
||||
parent:
|
||||
create_query: CREATE WORKLOAD `all`
|
||||
```
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` (`String`) - Workload name.
|
||||
- `parent` (`String`) - Parent workload name.
|
||||
- `create_query` (`String`) - The definition of the workload.
|
@ -43,6 +43,20 @@ Example:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
An alternative way to express which disks are used by a resource is SQL syntax:
|
||||
|
||||
```sql
|
||||
CREATE RESOURCE resource_name (WRITE DISK disk1, READ DISK disk2)
|
||||
```
|
||||
|
||||
Resource could be used for any number of disk for READ or WRITE or both for READ and WRITE. There a syntax allowing to use a resource for all the disks:
|
||||
|
||||
```sql
|
||||
CREATE RESOURCE all_io (READ ANY DISK, WRITE ANY DISK);
|
||||
```
|
||||
|
||||
Note that server configuration options have priority over SQL way to define resources.
|
||||
|
||||
## Workload markup {#workload_markup}
|
||||
|
||||
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
|
||||
@ -153,9 +167,48 @@ Example:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Workload hierarchy (SQL only) {#workloads}
|
||||
|
||||
Defining resources and classifiers in XML could be challenging. ClickHouse provides SQL syntax that is much more convenient. All resources that were created with `CREATE RESOURCE` share the same structure of the hierarchy, but could differ in some aspects. Every workload created with `CREATE WORKLOAD` maintains a few automatically created scheduling nodes for every resource. A child workload can be created inside another parent workload. Here is the example that defines exactly the same hierarchy as XML configuration above:
|
||||
|
||||
```sql
|
||||
CREATE RESOURCE network_write (WRITE DISK s3)
|
||||
CREATE RESOURCE network_read (READ DISK s3)
|
||||
CREATE WORKLOAD all SETTINGS max_requests = 100
|
||||
CREATE WORKLOAD development IN all
|
||||
CREATE WORKLOAD production IN all SETTINGS weight = 3
|
||||
```
|
||||
|
||||
The name of a leaf workload without children could be used in query settings `SETTINGS workload = 'name'`. Note that workload classifiers are also created automatically when using SQL syntax.
|
||||
|
||||
To customize workload the following settings could be used:
|
||||
* `priority` - sibling workloads are served according to static priority values (lower value means higher priority).
|
||||
* `weight` - sibling workloads having the same static priority share resources according to weights.
|
||||
* `max_requests` - the limit on the number of concurrent resource requests in this workload.
|
||||
* `max_cost` - the limit on the total inflight bytes count of concurrent resource requests in this workload.
|
||||
* `max_speed` - the limit on byte processing rate of this workload (the limit is independent for every resource).
|
||||
* `max_burst` - maximum number of bytes that could be processed by the workload without being throttled (for every resource independently).
|
||||
|
||||
Note that workload settings are translated into a proper set of scheduling nodes. For more details, see the description of the scheduling node [types and options](#hierarchy).
|
||||
|
||||
There is no way to specify different hierarchies of workloads for different resources. But there is a way to specify different workload setting value for a specific resource:
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE WORKLOAD all SETTINGS max_requests = 100, max_speed = 1000000 FOR network_read, max_speed = 2000000 FOR network_write
|
||||
```
|
||||
|
||||
Also note that workload or resource could not be dropped if it is referenced from another workload. To update a definition of a workload use `CREATE OR REPLACE WORKLOAD` query.
|
||||
|
||||
## Workloads and resources storage {#workload_entity_storage}
|
||||
Definitions of all workloads and resources in the form of `CREATE WORKLOAD` and `CREATE RESOURCE` queries are stored persistently either on disk at `workload_path` or in ZooKeeper at `workload_zookeeper_path`. ZooKeeper storage is recommended to achieve consistency between nodes. Alternatively `ON CLUSTER` clause could be used along with disk storage.
|
||||
|
||||
## See also
|
||||
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)
|
||||
- [system.workloads](/docs/en/operations/system-tables/workloads.md)
|
||||
- [system.resources](/docs/en/operations/system-tables/resources.md)
|
||||
- [merge_workload](/docs/en/operations/settings/merge-tree-settings.md#merge_workload) merge tree setting
|
||||
- [merge_workload](/docs/en/operations/server-configuration-parameters/settings.md#merge_workload) global server setting
|
||||
- [mutation_workload](/docs/en/operations/settings/merge-tree-settings.md#mutation_workload) merge tree setting
|
||||
- [mutation_workload](/docs/en/operations/server-configuration-parameters/settings.md#mutation_workload) global server setting
|
||||
- [workload_path](/docs/en/operations/server-configuration-parameters/settings.md#workload_path) global server setting
|
||||
- [workload_zookeeper_path](/docs/en/operations/server-configuration-parameters/settings.md#workload_zookeeper_path) global server setting
|
||||
|
@ -86,7 +86,7 @@
|
||||
#include <Dictionaries/registerDictionaries.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Server/HTTPHandlerFactory.h>
|
||||
#include "MetricsTransmitter.h"
|
||||
@ -920,7 +920,6 @@ try
|
||||
registerFormats();
|
||||
registerRemoteFileMetadatas();
|
||||
registerSchedulerNodes();
|
||||
registerResourceManagers();
|
||||
|
||||
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
|
||||
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
|
||||
@ -2253,6 +2252,8 @@ try
|
||||
database_catalog.assertDatabaseExists(default_database);
|
||||
/// Load user-defined SQL functions.
|
||||
global_context->getUserDefinedSQLObjectsStorage().loadObjects();
|
||||
/// Load WORKLOADs and RESOURCEs.
|
||||
global_context->getWorkloadEntityStorage().loadEntities();
|
||||
|
||||
global_context->getRefreshSet().setRefreshesStopped(false);
|
||||
}
|
||||
|
@ -1399,6 +1399,10 @@
|
||||
If not specified they will be stored locally. -->
|
||||
<!-- <user_defined_zookeeper_path>/clickhouse/user_defined</user_defined_zookeeper_path> -->
|
||||
|
||||
<!-- Path in ZooKeeper to store workload and resource created by the command CREATE WORKLOAD and CREATE REESOURCE.
|
||||
If not specified they will be stored locally. -->
|
||||
<!-- <workload_zookeeper_path>/clickhouse/workload/definitions.sql</workload_zookeeper_path> -->
|
||||
|
||||
<!-- Uncomment if you want data to be compressed 30-100% better.
|
||||
Don't do that if you just started using ClickHouse.
|
||||
-->
|
||||
|
@ -99,6 +99,8 @@ enum class AccessType : uint8_t
|
||||
M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables
|
||||
with arbitrary table engine */\
|
||||
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
|
||||
M(CREATE_WORKLOAD, "", GLOBAL, CREATE) /* allows to execute CREATE WORKLOAD */ \
|
||||
M(CREATE_RESOURCE, "", GLOBAL, CREATE) /* allows to execute CREATE RESOURCE */ \
|
||||
M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \
|
||||
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
|
||||
\
|
||||
@ -108,6 +110,8 @@ enum class AccessType : uint8_t
|
||||
implicitly enabled by the grant DROP_TABLE */\
|
||||
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
|
||||
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
|
||||
M(DROP_WORKLOAD, "", GLOBAL, DROP) /* allows to execute DROP WORKLOAD */\
|
||||
M(DROP_RESOURCE, "", GLOBAL, DROP) /* allows to execute DROP RESOURCE */\
|
||||
M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\
|
||||
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
|
||||
\
|
||||
|
@ -701,15 +701,17 @@ bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlag
|
||||
|
||||
const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY;
|
||||
const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION;
|
||||
const AccessFlags workload_ddl = AccessType::CREATE_WORKLOAD | AccessType::DROP_WORKLOAD;
|
||||
const AccessFlags resource_ddl = AccessType::CREATE_RESOURCE | AccessType::DROP_RESOURCE;
|
||||
const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl;
|
||||
const AccessFlags table_and_dictionary_and_function_ddl = table_ddl | dictionary_ddl | function_ddl;
|
||||
const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE;
|
||||
const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS;
|
||||
|
||||
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_and_function_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
|
||||
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_and_function_ddl | workload_ddl | resource_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
|
||||
const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE;
|
||||
|
||||
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl;
|
||||
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl | workload_ddl | resource_ddl;
|
||||
const AccessFlags introspection_flags = AccessType::INTROSPECTION;
|
||||
};
|
||||
static const PrecalculatedFlags precalc;
|
||||
|
@ -136,6 +136,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
||||
add_headers_and_sources(dbms Common/NamedCollections)
|
||||
add_headers_and_sources(dbms Common/Scheduler/Workload)
|
||||
|
||||
if (TARGET ch_contrib::amqp_cpp)
|
||||
add_headers_and_sources(dbms Storages/RabbitMQ)
|
||||
|
@ -183,8 +183,14 @@
|
||||
M(BuildVectorSimilarityIndexThreadsScheduled, "Number of queued or active jobs in the build vector similarity index thread pool.") \
|
||||
\
|
||||
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
||||
M(DiskPlainRewritableAzureFileCount, "Number of file entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
||||
M(DiskPlainRewritableAzureUniqueFileNamesCount, "Number of unique file name entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
||||
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
||||
M(DiskPlainRewritableLocalFileCount, "Number of file entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
||||
M(DiskPlainRewritableLocalUniqueFileNamesCount, "Number of unique file name entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
||||
M(DiskPlainRewritableS3DirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for S3ObjectStorage.") \
|
||||
M(DiskPlainRewritableS3FileCount, "Number of file entries in the 'plain_rewritable' in-memory map for S3ObjectStorage.") \
|
||||
M(DiskPlainRewritableS3UniqueFileNamesCount, "Number of unique file name entries in the 'plain_rewritable' in-memory map for S3ObjectStorage.") \
|
||||
\
|
||||
M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
|
||||
M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
|
||||
|
@ -6,6 +6,7 @@
|
||||
/// Separate type (rather than `Int64` is used just to avoid implicit conversion errors and to default-initialize
|
||||
struct Priority
|
||||
{
|
||||
Int64 value = 0; /// Note that lower value means higher priority.
|
||||
constexpr operator Int64() const { return value; } /// NOLINT
|
||||
using Value = Int64;
|
||||
Value value = 0; /// Note that lower value means higher priority.
|
||||
constexpr operator Value() const { return value; } /// NOLINT
|
||||
};
|
||||
|
@ -26,6 +26,9 @@ class IClassifier : private boost::noncopyable
|
||||
public:
|
||||
virtual ~IClassifier() = default;
|
||||
|
||||
/// Returns true iff resource access is allowed by this classifier
|
||||
virtual bool has(const String & resource_name) = 0;
|
||||
|
||||
/// Returns ResourceLink that should be used to access resource.
|
||||
/// Returned link is valid until classifier destruction.
|
||||
virtual ResourceLink get(const String & resource_name) = 0;
|
||||
@ -46,12 +49,15 @@ public:
|
||||
/// Initialize or reconfigure manager.
|
||||
virtual void updateConfiguration(const Poco::Util::AbstractConfiguration & config) = 0;
|
||||
|
||||
/// Returns true iff given resource is controlled through this manager.
|
||||
virtual bool hasResource(const String & resource_name) const = 0;
|
||||
|
||||
/// Obtain a classifier instance required to get access to resources.
|
||||
/// Note that it holds resource configuration, so should be destructed when query is done.
|
||||
virtual ClassifierPtr acquire(const String & classifier_name) = 0;
|
||||
|
||||
/// For introspection, see `system.scheduler` table
|
||||
using VisitorFunc = std::function<void(const String & resource, const String & path, const String & type, const SchedulerNodePtr & node)>;
|
||||
using VisitorFunc = std::function<void(const String & resource, const String & path, ISchedulerNode * node)>;
|
||||
virtual void forEachNode(VisitorFunc visitor) = 0;
|
||||
};
|
||||
|
||||
|
@ -15,8 +15,7 @@ namespace DB
|
||||
* When constraint is again satisfied, scheduleActivation() is called from finishRequest().
|
||||
*
|
||||
* Derived class behaviour requirements:
|
||||
* - dequeueRequest() must fill `request->constraint` iff it is nullptr;
|
||||
* - finishRequest() must be recursive: call to `parent_constraint->finishRequest()`.
|
||||
* - dequeueRequest() must call `request->addConstraint()`.
|
||||
*/
|
||||
class ISchedulerConstraint : public ISchedulerNode
|
||||
{
|
||||
@ -25,34 +24,16 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
ISchedulerConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
/// Resource consumption by `request` is finished.
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual void finishRequest(ResourceRequest * request) = 0;
|
||||
|
||||
void setParent(ISchedulerNode * parent_) override
|
||||
{
|
||||
ISchedulerNode::setParent(parent_);
|
||||
|
||||
// Assign `parent_constraint` to the nearest parent derived from ISchedulerConstraint
|
||||
for (ISchedulerNode * node = parent_; node != nullptr; node = node->parent)
|
||||
{
|
||||
if (auto * constraint = dynamic_cast<ISchedulerConstraint *>(node))
|
||||
{
|
||||
parent_constraint = constraint;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For introspection of current state (true = satisfied, false = violated)
|
||||
virtual bool isSatisfied() = 0;
|
||||
|
||||
protected:
|
||||
// Reference to nearest parent that is also derived from ISchedulerConstraint.
|
||||
// Request can traverse through multiple constraints while being dequeue from hierarchy,
|
||||
// while finishing request should traverse the same chain in reverse order.
|
||||
// NOTE: it must be immutable after initialization, because it is accessed in not thread-safe way from finishRequest()
|
||||
ISchedulerConstraint * parent_constraint = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -57,7 +57,13 @@ struct SchedulerNodeInfo
|
||||
|
||||
SchedulerNodeInfo() = default;
|
||||
|
||||
explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
|
||||
explicit SchedulerNodeInfo(double weight_, Priority priority_ = {})
|
||||
{
|
||||
setWeight(weight_);
|
||||
setPriority(priority_);
|
||||
}
|
||||
|
||||
explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config, const String & config_prefix = {})
|
||||
{
|
||||
setWeight(config.getDouble(config_prefix + ".weight", weight));
|
||||
setPriority(config.getInt64(config_prefix + ".priority", priority));
|
||||
@ -68,7 +74,7 @@ struct SchedulerNodeInfo
|
||||
if (value <= 0 || !isfinite(value))
|
||||
throw Exception(
|
||||
ErrorCodes::INVALID_SCHEDULER_NODE,
|
||||
"Negative and non-finite node weights are not allowed: {}",
|
||||
"Zero, negative and non-finite node weights are not allowed: {}",
|
||||
value);
|
||||
weight = value;
|
||||
}
|
||||
@ -78,6 +84,11 @@ struct SchedulerNodeInfo
|
||||
priority.value = value;
|
||||
}
|
||||
|
||||
void setPriority(Priority value)
|
||||
{
|
||||
priority = value;
|
||||
}
|
||||
|
||||
// To check if configuration update required
|
||||
bool equals(const SchedulerNodeInfo & o) const
|
||||
{
|
||||
@ -123,7 +134,14 @@ public:
|
||||
, info(config, config_prefix)
|
||||
{}
|
||||
|
||||
virtual ~ISchedulerNode() = default;
|
||||
ISchedulerNode(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: event_queue(event_queue_)
|
||||
, info(info_)
|
||||
{}
|
||||
|
||||
virtual ~ISchedulerNode();
|
||||
|
||||
virtual const String & getTypeName() const = 0;
|
||||
|
||||
/// Checks if two nodes configuration is equal
|
||||
virtual bool equals(ISchedulerNode * other)
|
||||
@ -134,10 +152,11 @@ public:
|
||||
/// Attach new child
|
||||
virtual void attachChild(const std::shared_ptr<ISchedulerNode> & child) = 0;
|
||||
|
||||
/// Detach and destroy child
|
||||
/// Detach child
|
||||
/// NOTE: child might be destroyed if the only reference was stored in parent
|
||||
virtual void removeChild(ISchedulerNode * child) = 0;
|
||||
|
||||
/// Get attached child by name
|
||||
/// Get attached child by name (for tests only)
|
||||
virtual ISchedulerNode * getChild(const String & child_name) = 0;
|
||||
|
||||
/// Activation of child due to the first pending request
|
||||
@ -147,7 +166,7 @@ public:
|
||||
/// Returns true iff node is active
|
||||
virtual bool isActive() = 0;
|
||||
|
||||
/// Returns number of active children
|
||||
/// Returns number of active children (for introspection only).
|
||||
virtual size_t activeChildren() = 0;
|
||||
|
||||
/// Returns the first request to be executed as the first component of resulting pair.
|
||||
@ -155,10 +174,10 @@ public:
|
||||
virtual std::pair<ResourceRequest *, bool> dequeueRequest() = 0;
|
||||
|
||||
/// Returns full path string using names of every parent
|
||||
String getPath()
|
||||
String getPath() const
|
||||
{
|
||||
String result;
|
||||
ISchedulerNode * ptr = this;
|
||||
const ISchedulerNode * ptr = this;
|
||||
while (ptr->parent)
|
||||
{
|
||||
result = "/" + ptr->basename + result;
|
||||
@ -168,10 +187,7 @@ public:
|
||||
}
|
||||
|
||||
/// Attach to a parent (used by attachChild)
|
||||
virtual void setParent(ISchedulerNode * parent_)
|
||||
{
|
||||
parent = parent_;
|
||||
}
|
||||
void setParent(ISchedulerNode * parent_);
|
||||
|
||||
protected:
|
||||
/// Notify parents about the first pending request or constraint becoming satisfied.
|
||||
@ -307,6 +323,15 @@ public:
|
||||
pending.notify_one();
|
||||
}
|
||||
|
||||
/// Removes an activation from queue
|
||||
void cancelActivation(ISchedulerNode * node)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (node->is_linked())
|
||||
activations.erase(activations.iterator_to(*node));
|
||||
node->activation_event_id = 0;
|
||||
}
|
||||
|
||||
/// Process single event if it exists
|
||||
/// Note that postponing constraint are ignored, use it to empty the queue including postponed events on shutdown
|
||||
/// Returns `true` iff event has been processed
|
||||
@ -471,6 +496,20 @@ private:
|
||||
std::atomic<TimePoint> manual_time{TimePoint()}; // for tests only
|
||||
};
|
||||
|
||||
inline ISchedulerNode::~ISchedulerNode()
|
||||
{
|
||||
// Make sure there is no dangling reference in activations queue
|
||||
event_queue->cancelActivation(this);
|
||||
}
|
||||
|
||||
inline void ISchedulerNode::setParent(ISchedulerNode * parent_)
|
||||
{
|
||||
parent = parent_;
|
||||
// Avoid activation of a detached node
|
||||
if (parent == nullptr)
|
||||
event_queue->cancelActivation(this);
|
||||
}
|
||||
|
||||
inline void ISchedulerNode::scheduleActivation()
|
||||
{
|
||||
if (likely(parent))
|
||||
|
@ -21,6 +21,10 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
ISchedulerQueue(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
// Wrapper for `enqueueRequest()` that should be used to account for available resource budget
|
||||
// Returns `estimated_cost` that should be passed later to `adjustBudget()`
|
||||
[[ nodiscard ]] ResourceCost enqueueRequestUsingBudget(ResourceRequest * request)
|
||||
@ -47,6 +51,11 @@ public:
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual bool cancelRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// Fails all the resource requests in queue and marks this queue as not usable.
|
||||
/// Afterwards any new request will be failed on `enqueueRequest()`.
|
||||
/// NOTE: This is done for queues that are about to be destructed.
|
||||
virtual void purgeQueue() = 0;
|
||||
|
||||
/// For introspection
|
||||
ResourceCost getBudget() const
|
||||
{
|
||||
|
@ -5,11 +5,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_NOT_FOUND;
|
||||
}
|
||||
|
||||
ClassifierDescription::ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
@ -31,9 +26,11 @@ ClassifiersConfig::ClassifiersConfig(const Poco::Util::AbstractConfiguration & c
|
||||
|
||||
const ClassifierDescription & ClassifiersConfig::get(const String & classifier_name)
|
||||
{
|
||||
static ClassifierDescription empty;
|
||||
if (auto it = classifiers.find(classifier_name); it != classifiers.end())
|
||||
return it->second;
|
||||
throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unknown workload classifier '{}' to access resources", classifier_name);
|
||||
else
|
||||
return empty;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ namespace DB
|
||||
/// Mapping of resource name into path string (e.g. "disk1" -> "/path/to/class")
|
||||
struct ClassifierDescription : std::unordered_map<String, String>
|
||||
{
|
||||
ClassifierDescription() = default;
|
||||
ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
|
||||
};
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Common/Scheduler/Nodes/DynamicResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/CustomResourceManager.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/SchedulerNodeFactory.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
#include <Common/Scheduler/ISchedulerQueue.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
@ -21,7 +20,7 @@ namespace ErrorCodes
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
}
|
||||
|
||||
DynamicResourceManager::State::State(EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config)
|
||||
CustomResourceManager::State::State(EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config)
|
||||
: classifiers(config)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
@ -35,7 +34,7 @@ DynamicResourceManager::State::State(EventQueue * event_queue, const Poco::Util:
|
||||
}
|
||||
}
|
||||
|
||||
DynamicResourceManager::State::Resource::Resource(
|
||||
CustomResourceManager::State::Resource::Resource(
|
||||
const String & name,
|
||||
EventQueue * event_queue,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
@ -92,7 +91,7 @@ DynamicResourceManager::State::Resource::Resource(
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "undefined root node path '/' for resource '{}'", name);
|
||||
}
|
||||
|
||||
DynamicResourceManager::State::Resource::~Resource()
|
||||
CustomResourceManager::State::Resource::~Resource()
|
||||
{
|
||||
// NOTE: we should rely on `attached_to` and cannot use `parent`,
|
||||
// NOTE: because `parent` can be `nullptr` in case attachment is still in event queue
|
||||
@ -106,14 +105,14 @@ DynamicResourceManager::State::Resource::~Resource()
|
||||
}
|
||||
}
|
||||
|
||||
DynamicResourceManager::State::Node::Node(const String & name, EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
CustomResourceManager::State::Node::Node(const String & name, EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
: type(config.getString(config_prefix + ".type", "fifo"))
|
||||
, ptr(SchedulerNodeFactory::instance().get(type, event_queue, config, config_prefix))
|
||||
{
|
||||
ptr->basename = name;
|
||||
}
|
||||
|
||||
bool DynamicResourceManager::State::Resource::equals(const DynamicResourceManager::State::Resource & o) const
|
||||
bool CustomResourceManager::State::Resource::equals(const CustomResourceManager::State::Resource & o) const
|
||||
{
|
||||
if (nodes.size() != o.nodes.size())
|
||||
return false;
|
||||
@ -130,14 +129,14 @@ bool DynamicResourceManager::State::Resource::equals(const DynamicResourceManage
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicResourceManager::State::Node::equals(const DynamicResourceManager::State::Node & o) const
|
||||
bool CustomResourceManager::State::Node::equals(const CustomResourceManager::State::Node & o) const
|
||||
{
|
||||
if (type != o.type)
|
||||
return false;
|
||||
return ptr->equals(o.ptr.get());
|
||||
}
|
||||
|
||||
DynamicResourceManager::Classifier::Classifier(const DynamicResourceManager::StatePtr & state_, const String & classifier_name)
|
||||
CustomResourceManager::Classifier::Classifier(const CustomResourceManager::StatePtr & state_, const String & classifier_name)
|
||||
: state(state_)
|
||||
{
|
||||
// State is immutable, but nodes are mutable and thread-safe
|
||||
@ -162,20 +161,25 @@ DynamicResourceManager::Classifier::Classifier(const DynamicResourceManager::Sta
|
||||
}
|
||||
}
|
||||
|
||||
ResourceLink DynamicResourceManager::Classifier::get(const String & resource_name)
|
||||
bool CustomResourceManager::Classifier::has(const String & resource_name)
|
||||
{
|
||||
return resources.contains(resource_name);
|
||||
}
|
||||
|
||||
ResourceLink CustomResourceManager::Classifier::get(const String & resource_name)
|
||||
{
|
||||
if (auto iter = resources.find(resource_name); iter != resources.end())
|
||||
return iter->second;
|
||||
throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Access denied to resource '{}'", resource_name);
|
||||
}
|
||||
|
||||
DynamicResourceManager::DynamicResourceManager()
|
||||
CustomResourceManager::CustomResourceManager()
|
||||
: state(new State())
|
||||
{
|
||||
scheduler.start();
|
||||
}
|
||||
|
||||
void DynamicResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
void CustomResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
StatePtr new_state = std::make_shared<State>(scheduler.event_queue, config);
|
||||
|
||||
@ -217,7 +221,13 @@ void DynamicResourceManager::updateConfiguration(const Poco::Util::AbstractConfi
|
||||
// NOTE: after mutex unlock `state` became available for Classifier(s) and must be immutable
|
||||
}
|
||||
|
||||
ClassifierPtr DynamicResourceManager::acquire(const String & classifier_name)
|
||||
bool CustomResourceManager::hasResource(const String & resource_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return state->resources.contains(resource_name);
|
||||
}
|
||||
|
||||
ClassifierPtr CustomResourceManager::acquire(const String & classifier_name)
|
||||
{
|
||||
// Acquire a reference to the current state
|
||||
StatePtr state_ref;
|
||||
@ -229,7 +239,7 @@ ClassifierPtr DynamicResourceManager::acquire(const String & classifier_name)
|
||||
return std::make_shared<Classifier>(state_ref, classifier_name);
|
||||
}
|
||||
|
||||
void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
void CustomResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
{
|
||||
// Acquire a reference to the current state
|
||||
StatePtr state_ref;
|
||||
@ -244,7 +254,7 @@ void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
{
|
||||
for (auto & [name, resource] : state_ref->resources)
|
||||
for (auto & [path, node] : resource->nodes)
|
||||
visitor(name, path, node.type, node.ptr);
|
||||
visitor(name, path, node.ptr.get());
|
||||
promise.set_value();
|
||||
});
|
||||
|
||||
@ -252,9 +262,4 @@ void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
future.get();
|
||||
}
|
||||
|
||||
void registerDynamicResourceManager(ResourceManagerFactory & factory)
|
||||
{
|
||||
factory.registerMethod<DynamicResourceManager>("dynamic");
|
||||
}
|
||||
|
||||
}
|
@ -10,7 +10,9 @@ namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Implementation of `IResourceManager` supporting arbitrary dynamic hierarchy of scheduler nodes.
|
||||
* Implementation of `IResourceManager` supporting arbitrary hierarchy of scheduler nodes.
|
||||
* Scheduling hierarchies for every resource is described through server xml or yaml configuration.
|
||||
* Configuration could be changed dynamically without server restart.
|
||||
* All resources are controlled by single root `SchedulerRoot`.
|
||||
*
|
||||
* State of manager is set of resources attached to the scheduler. States are referenced by classifiers.
|
||||
@ -24,11 +26,12 @@ namespace DB
|
||||
* violation will apply to fairness. Old version exists as long as there is at least one classifier
|
||||
* instance referencing it. Classifiers are typically attached to queries and will be destructed with them.
|
||||
*/
|
||||
class DynamicResourceManager : public IResourceManager
|
||||
class CustomResourceManager : public IResourceManager
|
||||
{
|
||||
public:
|
||||
DynamicResourceManager();
|
||||
CustomResourceManager();
|
||||
void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override;
|
||||
bool hasResource(const String & resource_name) const override;
|
||||
ClassifierPtr acquire(const String & classifier_name) override;
|
||||
void forEachNode(VisitorFunc visitor) override;
|
||||
|
||||
@ -79,6 +82,7 @@ private:
|
||||
{
|
||||
public:
|
||||
Classifier(const StatePtr & state_, const String & classifier_name);
|
||||
bool has(const String & resource_name) override;
|
||||
ResourceLink get(const String & resource_name) override;
|
||||
private:
|
||||
std::unordered_map<String, ResourceLink> resources; // accessible resources by names
|
||||
@ -86,7 +90,7 @@ private:
|
||||
};
|
||||
|
||||
SchedulerRoot scheduler;
|
||||
std::mutex mutex;
|
||||
mutable std::mutex mutex;
|
||||
StatePtr state;
|
||||
};
|
||||
|
@ -28,7 +28,7 @@ namespace ErrorCodes
|
||||
* of a child is set to vruntime of "start" of the last request. This guarantees immediate processing
|
||||
* of at least single request of newly activated children and thus best isolation and scheduling latency.
|
||||
*/
|
||||
class FairPolicy : public ISchedulerNode
|
||||
class FairPolicy final : public ISchedulerNode
|
||||
{
|
||||
/// Scheduling state of a child
|
||||
struct Item
|
||||
@ -48,6 +48,23 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
FairPolicy(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
~FairPolicy() override
|
||||
{
|
||||
// We need to clear `parent` in all children to avoid dangling references
|
||||
while (!children.empty())
|
||||
removeChild(children.begin()->second.get());
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fair");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
|
@ -23,13 +23,28 @@ namespace ErrorCodes
|
||||
/*
|
||||
* FIFO queue to hold pending resource requests
|
||||
*/
|
||||
class FifoQueue : public ISchedulerQueue
|
||||
class FifoQueue final : public ISchedulerQueue
|
||||
{
|
||||
public:
|
||||
FifoQueue(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
: ISchedulerQueue(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
FifoQueue(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerQueue(event_queue_, info_)
|
||||
{}
|
||||
|
||||
~FifoQueue() override
|
||||
{
|
||||
purgeQueue();
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fifo");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -42,6 +57,8 @@ public:
|
||||
void enqueueRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (is_not_usable)
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Scheduler queue is about to be destructed");
|
||||
queue_cost += request->cost;
|
||||
bool was_empty = requests.empty();
|
||||
requests.push_back(*request);
|
||||
@ -66,6 +83,8 @@ public:
|
||||
bool cancelRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (is_not_usable)
|
||||
return false; // Any request should already be failed or executed
|
||||
if (request->is_linked())
|
||||
{
|
||||
// It's impossible to check that `request` is indeed inserted to this queue and not another queue.
|
||||
@ -88,6 +107,19 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
void purgeQueue() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
is_not_usable = true;
|
||||
while (!requests.empty())
|
||||
{
|
||||
ResourceRequest * request = &requests.front();
|
||||
requests.pop_front();
|
||||
request->failed(std::make_exception_ptr(
|
||||
Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Scheduler queue with resource request is about to be destructed")));
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -131,6 +163,7 @@ private:
|
||||
std::mutex mutex;
|
||||
Int64 queue_cost = 0;
|
||||
boost::intrusive::list<ResourceRequest> requests;
|
||||
bool is_not_usable = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
532
src/Common/Scheduler/Nodes/IOResourceManager.cpp
Normal file
532
src/Common/Scheduler/Nodes/IOResourceManager.cpp
Normal file
@ -0,0 +1,532 @@
|
||||
#include <Common/Scheduler/Nodes/IOResourceManager.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/Priority.h>
|
||||
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_NOT_FOUND;
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
String getEntityName(const ASTPtr & ast)
|
||||
{
|
||||
if (auto * create = typeid_cast<ASTCreateWorkloadQuery *>(ast.get()))
|
||||
return create->getWorkloadName();
|
||||
if (auto * create = typeid_cast<ASTCreateResourceQuery *>(ast.get()))
|
||||
return create->getResourceName();
|
||||
return "unknown-workload-entity";
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::NodeInfo::NodeInfo(const ASTPtr & ast, const String & resource_name)
|
||||
{
|
||||
auto * create = assert_cast<ASTCreateWorkloadQuery *>(ast.get());
|
||||
name = create->getWorkloadName();
|
||||
parent = create->getWorkloadParent();
|
||||
settings.updateFromChanges(create->changes, resource_name);
|
||||
}
|
||||
|
||||
IOResourceManager::Resource::Resource(const ASTPtr & resource_entity_)
|
||||
: resource_entity(resource_entity_)
|
||||
, resource_name(getEntityName(resource_entity))
|
||||
{
|
||||
scheduler.start();
|
||||
}
|
||||
|
||||
IOResourceManager::Resource::~Resource()
|
||||
{
|
||||
scheduler.stop();
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::createNode(const NodeInfo & info)
|
||||
{
|
||||
if (info.name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Workload must have a name in resource '{}'",
|
||||
resource_name);
|
||||
|
||||
if (info.name == info.parent)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Self-referencing workload '{}' is not allowed in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
if (node_for_workload.contains(info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for creating workload '{}' already exist in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
if (!info.parent.empty() && !node_for_workload.contains(info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parent node '{}' for creating workload '{}' does not exist in resource '{}'",
|
||||
info.parent, info.name, resource_name);
|
||||
|
||||
if (info.parent.empty() && root_node)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The second root workload '{}' is not allowed (current root '{}') in resource '{}'",
|
||||
info.name, root_node->basename, resource_name);
|
||||
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
auto node = std::make_shared<UnifiedSchedulerNode>(scheduler.event_queue, info.settings);
|
||||
node->basename = info.name;
|
||||
if (!info.parent.empty())
|
||||
node_for_workload[info.parent]->attachUnifiedChild(node);
|
||||
else
|
||||
{
|
||||
root_node = node;
|
||||
scheduler.attachChild(root_node);
|
||||
}
|
||||
node_for_workload[info.name] = node;
|
||||
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::deleteNode(const NodeInfo & info)
|
||||
{
|
||||
if (!node_for_workload.contains(info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for removing workload '{}' does not exist in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
if (!info.parent.empty() && !node_for_workload.contains(info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parent node '{}' for removing workload '{}' does not exist in resource '{}'",
|
||||
info.parent, info.name, resource_name);
|
||||
|
||||
auto node = node_for_workload[info.name];
|
||||
|
||||
if (node->hasUnifiedChildren())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Removing workload '{}' with children in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
executeInSchedulerThread([&]
|
||||
{
|
||||
if (!info.parent.empty())
|
||||
node_for_workload[info.parent]->detachUnifiedChild(node);
|
||||
else
|
||||
{
|
||||
chassert(node == root_node);
|
||||
scheduler.removeChild(root_node.get());
|
||||
root_node.reset();
|
||||
}
|
||||
|
||||
node_for_workload.erase(info.name);
|
||||
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::updateNode(const NodeInfo & old_info, const NodeInfo & new_info)
|
||||
{
|
||||
if (old_info.name != new_info.name)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Updating a name of workload '{}' to '{}' is not allowed in resource '{}'",
|
||||
old_info.name, new_info.name, resource_name);
|
||||
|
||||
if (old_info.parent != new_info.parent && (old_info.parent.empty() || new_info.parent.empty()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Workload '{}' invalid update of parent from '{}' to '{}' in resource '{}'",
|
||||
old_info.name, old_info.parent, new_info.parent, resource_name);
|
||||
|
||||
if (!node_for_workload.contains(old_info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for updating workload '{}' does not exist in resource '{}'",
|
||||
old_info.name, resource_name);
|
||||
|
||||
if (!old_info.parent.empty() && !node_for_workload.contains(old_info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Old parent node '{}' for updating workload '{}' does not exist in resource '{}'",
|
||||
old_info.parent, old_info.name, resource_name);
|
||||
|
||||
if (!new_info.parent.empty() && !node_for_workload.contains(new_info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "New parent node '{}' for updating workload '{}' does not exist in resource '{}'",
|
||||
new_info.parent, new_info.name, resource_name);
|
||||
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
auto node = node_for_workload[old_info.name];
|
||||
bool detached = false;
|
||||
if (UnifiedSchedulerNode::updateRequiresDetach(old_info.parent, new_info.parent, old_info.settings, new_info.settings))
|
||||
{
|
||||
if (!old_info.parent.empty())
|
||||
node_for_workload[old_info.parent]->detachUnifiedChild(node);
|
||||
detached = true;
|
||||
}
|
||||
|
||||
node->updateSchedulingSettings(new_info.settings);
|
||||
|
||||
if (detached)
|
||||
{
|
||||
if (!new_info.parent.empty())
|
||||
node_for_workload[new_info.parent]->attachUnifiedChild(node);
|
||||
}
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::updateCurrentVersion()
|
||||
{
|
||||
auto previous_version = current_version;
|
||||
|
||||
// Create a full list of constraints and queues in the current hierarchy
|
||||
current_version = std::make_shared<Version>();
|
||||
if (root_node)
|
||||
root_node->addRawPointerNodes(current_version->nodes);
|
||||
|
||||
// See details in version control section of description in IOResourceManager.h
|
||||
if (previous_version)
|
||||
{
|
||||
previous_version->newer_version = current_version;
|
||||
previous_version.reset(); // Destroys previous version nodes if there are no classifiers referencing it
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::Workload::Workload(IOResourceManager * resource_manager_, const ASTPtr & workload_entity_)
|
||||
: resource_manager(resource_manager_)
|
||||
, workload_entity(workload_entity_)
|
||||
{
|
||||
try
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->createNode(NodeInfo(workload_entity, resource_name));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error in IOResourceManager: {}",
|
||||
getCurrentExceptionMessage(/* with_stacktrace = */ true));
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::Workload::~Workload()
|
||||
{
|
||||
try
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->deleteNode(NodeInfo(workload_entity, resource_name));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error in IOResourceManager: {}",
|
||||
getCurrentExceptionMessage(/* with_stacktrace = */ true));
|
||||
}
|
||||
}
|
||||
|
||||
void IOResourceManager::Workload::updateWorkload(const ASTPtr & new_entity)
|
||||
{
|
||||
try
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->updateNode(NodeInfo(workload_entity, resource_name), NodeInfo(new_entity, resource_name));
|
||||
workload_entity = new_entity;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error in IOResourceManager: {}",
|
||||
getCurrentExceptionMessage(/* with_stacktrace = */ true));
|
||||
}
|
||||
}
|
||||
|
||||
String IOResourceManager::Workload::getParent() const
|
||||
{
|
||||
return assert_cast<ASTCreateWorkloadQuery *>(workload_entity.get())->getWorkloadParent();
|
||||
}
|
||||
|
||||
IOResourceManager::IOResourceManager(IWorkloadEntityStorage & storage_)
|
||||
: storage(storage_)
|
||||
, log{getLogger("IOResourceManager")}
|
||||
{
|
||||
subscription = storage.getAllEntitiesAndSubscribe(
|
||||
[this] (const std::vector<IWorkloadEntityStorage::Event> & events)
|
||||
{
|
||||
for (const auto & [entity_type, entity_name, entity] : events)
|
||||
{
|
||||
switch (entity_type)
|
||||
{
|
||||
case WorkloadEntityType::Workload:
|
||||
{
|
||||
if (entity)
|
||||
createOrUpdateWorkload(entity_name, entity);
|
||||
else
|
||||
deleteWorkload(entity_name);
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::Resource:
|
||||
{
|
||||
if (entity)
|
||||
createOrUpdateResource(entity_name, entity);
|
||||
else
|
||||
deleteResource(entity_name);
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::MAX: break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
IOResourceManager::~IOResourceManager()
|
||||
{
|
||||
subscription.reset();
|
||||
resources.clear();
|
||||
workloads.clear();
|
||||
}
|
||||
|
||||
void IOResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration &)
|
||||
{
|
||||
// No-op
|
||||
}
|
||||
|
||||
void IOResourceManager::createOrUpdateWorkload(const String & workload_name, const ASTPtr & ast)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto workload_iter = workloads.find(workload_name); workload_iter != workloads.end())
|
||||
workload_iter->second->updateWorkload(ast);
|
||||
else
|
||||
workloads.emplace(workload_name, std::make_shared<Workload>(this, ast));
|
||||
}
|
||||
|
||||
void IOResourceManager::deleteWorkload(const String & workload_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto workload_iter = workloads.find(workload_name); workload_iter != workloads.end())
|
||||
{
|
||||
// Note that we rely of the fact that workload entity storage will not drop workload that is used as a parent
|
||||
workloads.erase(workload_iter);
|
||||
}
|
||||
else // Workload to be deleted does not exist -- do nothing, throwing exceptions from a subscription is pointless
|
||||
LOG_ERROR(log, "Delete workload that doesn't exist: {}", workload_name);
|
||||
}
|
||||
|
||||
void IOResourceManager::createOrUpdateResource(const String & resource_name, const ASTPtr & ast)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto resource_iter = resources.find(resource_name); resource_iter != resources.end())
|
||||
resource_iter->second->updateResource(ast);
|
||||
else
|
||||
{
|
||||
// Add all workloads into the new resource
|
||||
auto resource = std::make_shared<Resource>(ast);
|
||||
for (Workload * workload : topologicallySortedWorkloads())
|
||||
resource->createNode(NodeInfo(workload->workload_entity, resource_name));
|
||||
|
||||
// Attach the resource
|
||||
resources.emplace(resource_name, resource);
|
||||
}
|
||||
}
|
||||
|
||||
void IOResourceManager::deleteResource(const String & resource_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto resource_iter = resources.find(resource_name); resource_iter != resources.end())
|
||||
{
|
||||
resources.erase(resource_iter);
|
||||
}
|
||||
else // Resource to be deleted does not exist -- do nothing, throwing exceptions from a subscription is pointless
|
||||
LOG_ERROR(log, "Delete resource that doesn't exist: {}", resource_name);
|
||||
}
|
||||
|
||||
IOResourceManager::Classifier::~Classifier()
|
||||
{
|
||||
// Detach classifier from all resources in parallel (executed in every scheduler thread)
|
||||
std::vector<std::future<void>> futures;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
futures.reserve(attachments.size());
|
||||
for (auto & [resource_name, attachment] : attachments)
|
||||
{
|
||||
futures.emplace_back(attachment.resource->detachClassifier(std::move(attachment.version)));
|
||||
attachment.link.reset(); // Just in case because it is not valid any longer
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all tasks to finish (to avoid races in case of exceptions)
|
||||
for (auto & future : futures)
|
||||
future.wait();
|
||||
|
||||
// There should not be any exceptions because it just destruct few objects, but let's rethrow just in case
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
|
||||
// This unreferences and probably destroys `Resource` objects.
|
||||
// NOTE: We cannot do it in the scheduler threads (because thread cannot join itself).
|
||||
attachments.clear();
|
||||
}
|
||||
|
||||
std::future<void> IOResourceManager::Resource::detachClassifier(VersionPtr && version)
|
||||
{
|
||||
auto detach_promise = std::make_shared<std::promise<void>>(); // event queue task is std::function, which requires copy semanticss
|
||||
auto future = detach_promise->get_future();
|
||||
scheduler.event_queue->enqueue([detached_version = std::move(version), promise = std::move(detach_promise)] mutable
|
||||
{
|
||||
try
|
||||
{
|
||||
// Unreferences and probably destroys the version and scheduler nodes it owns.
|
||||
// The main reason from moving destruction into the scheduler thread is to
|
||||
// free memory in the same thread it was allocated to avoid memtrackers drift.
|
||||
detached_version.reset();
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
return future;
|
||||
}
|
||||
|
||||
bool IOResourceManager::Classifier::has(const String & resource_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return attachments.contains(resource_name);
|
||||
}
|
||||
|
||||
ResourceLink IOResourceManager::Classifier::get(const String & resource_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto iter = attachments.find(resource_name); iter != attachments.end())
|
||||
return iter->second.link;
|
||||
else
|
||||
throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Access denied to resource '{}'", resource_name);
|
||||
}
|
||||
|
||||
void IOResourceManager::Classifier::attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
chassert(!attachments.contains(resource->getName()));
|
||||
attachments[resource->getName()] = Attachment{.resource = resource, .version = version, .link = link};
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::updateResource(const ASTPtr & new_resource_entity)
|
||||
{
|
||||
chassert(getEntityName(new_resource_entity) == resource_name);
|
||||
resource_entity = new_resource_entity;
|
||||
}
|
||||
|
||||
std::future<void> IOResourceManager::Resource::attachClassifier(Classifier & classifier, const String & workload_name)
|
||||
{
|
||||
auto attach_promise = std::make_shared<std::promise<void>>(); // event queue task is std::function, which requires copy semantics
|
||||
auto future = attach_promise->get_future();
|
||||
scheduler.event_queue->enqueue([&, this, promise = std::move(attach_promise)]
|
||||
{
|
||||
try
|
||||
{
|
||||
if (auto iter = node_for_workload.find(workload_name); iter != node_for_workload.end())
|
||||
{
|
||||
auto queue = iter->second->getQueue();
|
||||
if (!queue)
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unable to use workload '{}' that have children for resource '{}'",
|
||||
workload_name, resource_name);
|
||||
classifier.attach(shared_from_this(), current_version, ResourceLink{.queue = queue.get()});
|
||||
}
|
||||
else
|
||||
{
|
||||
// This resource does not have specified workload. It is either unknown or managed by another resource manager.
|
||||
// We leave this resource not attached to the classifier. Access denied will be thrown later on `classifier->get(resource_name)`
|
||||
}
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
return future;
|
||||
}
|
||||
|
||||
bool IOResourceManager::hasResource(const String & resource_name) const
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return resources.contains(resource_name);
|
||||
}
|
||||
|
||||
ClassifierPtr IOResourceManager::acquire(const String & workload_name)
|
||||
{
|
||||
auto classifier = std::make_shared<Classifier>();
|
||||
|
||||
// Attach classifier to all resources in parallel (executed in every scheduler thread)
|
||||
std::vector<std::future<void>> futures;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
futures.reserve(resources.size());
|
||||
for (auto & [resource_name, resource] : resources)
|
||||
futures.emplace_back(resource->attachClassifier(*classifier, workload_name));
|
||||
}
|
||||
|
||||
// Wait for all tasks to finish (to avoid races in case of exceptions)
|
||||
for (auto & future : futures)
|
||||
future.wait();
|
||||
|
||||
// Rethrow exceptions if any
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
|
||||
return classifier;
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::forEachResourceNode(IResourceManager::VisitorFunc & visitor)
|
||||
{
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
for (auto & [path, node] : node_for_workload)
|
||||
{
|
||||
node->forEachSchedulerNode([&] (ISchedulerNode * scheduler_node)
|
||||
{
|
||||
visitor(resource_name, scheduler_node->getPath(), scheduler_node);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
{
|
||||
// Copy resource to avoid holding mutex for a long time
|
||||
std::unordered_map<String, ResourcePtr> resources_copy;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
resources_copy = resources;
|
||||
}
|
||||
|
||||
/// Run tasks one by one to avoid concurrent calls to visitor
|
||||
for (auto & [resource_name, resource] : resources_copy)
|
||||
resource->forEachResourceNode(visitor);
|
||||
}
|
||||
|
||||
void IOResourceManager::topologicallySortedWorkloadsImpl(Workload * workload, std::unordered_set<Workload *> & visited, std::vector<Workload *> & sorted_workloads)
|
||||
{
|
||||
if (visited.contains(workload))
|
||||
return;
|
||||
visited.insert(workload);
|
||||
|
||||
// Recurse into parent (if any)
|
||||
String parent = workload->getParent();
|
||||
if (!parent.empty())
|
||||
{
|
||||
auto parent_iter = workloads.find(parent);
|
||||
chassert(parent_iter != workloads.end()); // validations check that all parents exist
|
||||
topologicallySortedWorkloadsImpl(parent_iter->second.get(), visited, sorted_workloads);
|
||||
}
|
||||
|
||||
sorted_workloads.push_back(workload);
|
||||
}
|
||||
|
||||
std::vector<IOResourceManager::Workload *> IOResourceManager::topologicallySortedWorkloads()
|
||||
{
|
||||
std::vector<Workload *> sorted_workloads;
|
||||
std::unordered_set<Workload *> visited;
|
||||
for (auto & [workload_name, workload] : workloads)
|
||||
topologicallySortedWorkloadsImpl(workload.get(), visited, sorted_workloads);
|
||||
return sorted_workloads;
|
||||
}
|
||||
|
||||
}
|
281
src/Common/Scheduler/Nodes/IOResourceManager.h
Normal file
281
src/Common/Scheduler/Nodes/IOResourceManager.h
Normal file
@ -0,0 +1,281 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
#include <Common/Scheduler/SchedulerRoot.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <future>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Implementation of `IResourceManager` that creates hierarchy of scheduler nodes according to
|
||||
* workload entities (WORKLOADs and RESOURCEs). It subscribes for updates in IWorkloadEntityStorage and
|
||||
* creates hierarchy of UnifiedSchedulerNode identical to the hierarchy of WORKLOADs.
|
||||
* For every RESOURCE an independent hierarchy of scheduler nodes is created.
|
||||
*
|
||||
* Manager process updates of WORKLOADs and RESOURCEs: CREATE/DROP/ALTER.
|
||||
* When a RESOURCE is created (dropped) a corresponding scheduler nodes hierarchy is created (destroyed).
|
||||
* After DROP RESOURCE parts of hierarchy might be kept alive while at least one query uses it.
|
||||
*
|
||||
* Manager is specific to IO only because it create scheduler node hierarchies for RESOURCEs having
|
||||
* WRITE DISK and/or READ DISK definitions. CPU and memory resources are managed separately.
|
||||
*
|
||||
* Classifiers are used (1) to access IO resources and (2) to keep shared ownership of scheduling nodes.
|
||||
* This allows `ResourceRequest` and `ResourceLink` to hold raw pointers as long as
|
||||
* `ClassifierPtr` is acquired and held.
|
||||
*
|
||||
* === RESOURCE ARCHITECTURE ===
|
||||
* Let's consider how a single resource is implemented. Every workload is represented by corresponding UnifiedSchedulerNode.
|
||||
* Every UnifiedSchedulerNode manages its own subtree of ISchedulerNode objects (see details in UnifiedSchedulerNode.h)
|
||||
* UnifiedSchedulerNode for workload w/o children has a queue, which provide a ResourceLink for consumption.
|
||||
* Parent of the root workload for a resource is SchedulerRoot with its own scheduler thread.
|
||||
* So every resource has its dedicated thread for processing of resource request and other events (see EventQueue).
|
||||
*
|
||||
* Here is an example of SQL and corresponding hierarchy of scheduler nodes:
|
||||
* CREATE RESOURCE my_io_resource (...)
|
||||
* CREATE WORKLOAD all
|
||||
* CREATE WORKLOAD production PARENT all
|
||||
* CREATE WORKLOAD development PARENT all
|
||||
*
|
||||
* root - SchedulerRoot (with scheduler thread and EventQueue)
|
||||
* |
|
||||
* all - UnifiedSchedulerNode
|
||||
* |
|
||||
* p0_fair - FairPolicy (part of parent UnifiedSchedulerNode internal structure)
|
||||
* / \
|
||||
* production development - UnifiedSchedulerNode
|
||||
* | |
|
||||
* queue queue - FifoQueue (part of parent UnifiedSchedulerNode internal structure)
|
||||
*
|
||||
* === UPDATING WORKLOADS ===
|
||||
* Workload may be created, updated or deleted.
|
||||
* Updating a child of a workload might lead to updating other workloads:
|
||||
* 1. Workload itself: it's structure depend on settings of children workloads
|
||||
* (e.g. fifo node of a leaf workload is remove when the first child is added;
|
||||
* and a fair node is inserted after the first two children are added).
|
||||
* 2. Other children: for them path to root might be changed (e.g. intermediate priority node is inserted)
|
||||
*
|
||||
* === VERSION CONTROL ===
|
||||
* Versions are created on hierarchy updates and hold ownership of nodes that are used through raw pointers.
|
||||
* Classifier reference version of every resource it use. Older version reference newer version.
|
||||
* Here is a diagram explaining version control based on Version objects (for 1 resource):
|
||||
*
|
||||
* [nodes] [nodes] [nodes]
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* version1 --> version2 -...-> versionN
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* old_classifier new_classifier current_version
|
||||
*
|
||||
* Previous version should hold reference to a newer version. It is required for proper handling of updates.
|
||||
* Classifiers that were created for any of old versions may use nodes of newer version due to updateNode().
|
||||
* It may move a queue to a new position in the hierarchy or create/destroy constraints, thus resource requests
|
||||
* created by old classifier may reference constraints of newer versions through `request->constraints` which
|
||||
* is filled during dequeueRequest().
|
||||
*
|
||||
* === THREADS ===
|
||||
* scheduler thread:
|
||||
* - one thread per resource
|
||||
* - uses event_queue (per resource) for processing w/o holding mutex for every scheduler node
|
||||
* - handle resource requests
|
||||
* - node activations
|
||||
* - scheduler hierarchy updates
|
||||
* query thread:
|
||||
* - multiple independent threads
|
||||
* - send resource requests
|
||||
* - acquire and release classifiers (via scheduler event queues)
|
||||
* control thread:
|
||||
* - modify workload and resources through subscription
|
||||
*
|
||||
* === SYNCHRONIZATION ===
|
||||
* List of related sync primitives and their roles:
|
||||
* IOResourceManager::mutex
|
||||
* - protects resource manager data structures - resource and workloads
|
||||
* - serialize control thread actions
|
||||
* IOResourceManager::Resource::scheduler->event_queue
|
||||
* - serializes scheduler hierarchy events
|
||||
* - events are created in control and query threads
|
||||
* - all events are processed by specific scheduler thread
|
||||
* - hierarchy-wide actions: requests dequeueing, activations propagation and nodes updates.
|
||||
* - resource version control management
|
||||
* FifoQueue::mutex and SemaphoreContraint::mutex
|
||||
* - serializes query and scheduler threads on specific node accesses
|
||||
* - resource request processing: enqueueRequest(), dequeueRequest() and finishRequest()
|
||||
*/
|
||||
class IOResourceManager : public IResourceManager
|
||||
{
|
||||
public:
|
||||
explicit IOResourceManager(IWorkloadEntityStorage & storage_);
|
||||
~IOResourceManager() override;
|
||||
void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override;
|
||||
bool hasResource(const String & resource_name) const override;
|
||||
ClassifierPtr acquire(const String & workload_name) override;
|
||||
void forEachNode(VisitorFunc visitor) override;
|
||||
|
||||
private:
|
||||
// Forward declarations
|
||||
struct NodeInfo;
|
||||
struct Version;
|
||||
class Resource;
|
||||
struct Workload;
|
||||
class Classifier;
|
||||
|
||||
friend struct Workload;
|
||||
|
||||
using VersionPtr = std::shared_ptr<Version>;
|
||||
using ResourcePtr = std::shared_ptr<Resource>;
|
||||
using WorkloadPtr = std::shared_ptr<Workload>;
|
||||
|
||||
/// Helper for parsing workload AST for a specific resource
|
||||
struct NodeInfo
|
||||
{
|
||||
String name; // Workload name
|
||||
String parent; // Name of parent workload
|
||||
SchedulingSettings settings; // Settings specific for a given resource
|
||||
|
||||
NodeInfo(const ASTPtr & ast, const String & resource_name);
|
||||
};
|
||||
|
||||
/// Ownership control for scheduler nodes, which could be referenced by raw pointers
|
||||
struct Version
|
||||
{
|
||||
std::vector<SchedulerNodePtr> nodes;
|
||||
VersionPtr newer_version;
|
||||
};
|
||||
|
||||
/// Holds a thread and hierarchy of unified scheduler nodes for specific RESOURCE
|
||||
class Resource : public std::enable_shared_from_this<Resource>, boost::noncopyable
|
||||
{
|
||||
public:
|
||||
explicit Resource(const ASTPtr & resource_entity_);
|
||||
~Resource();
|
||||
|
||||
const String & getName() const { return resource_name; }
|
||||
|
||||
/// Hierarchy management
|
||||
void createNode(const NodeInfo & info);
|
||||
void deleteNode(const NodeInfo & info);
|
||||
void updateNode(const NodeInfo & old_info, const NodeInfo & new_info);
|
||||
|
||||
/// Updates resource entity
|
||||
void updateResource(const ASTPtr & new_resource_entity);
|
||||
|
||||
/// Updates a classifier to contain a reference for specified workload
|
||||
std::future<void> attachClassifier(Classifier & classifier, const String & workload_name);
|
||||
|
||||
/// Remove classifier reference. This destroys scheduler nodes in proper scheduler thread
|
||||
std::future<void> detachClassifier(VersionPtr && version);
|
||||
|
||||
/// Introspection
|
||||
void forEachResourceNode(IOResourceManager::VisitorFunc & visitor);
|
||||
|
||||
private:
|
||||
void updateCurrentVersion();
|
||||
|
||||
template <class Task>
|
||||
void executeInSchedulerThread(Task && task)
|
||||
{
|
||||
std::promise<void> promise;
|
||||
auto future = promise.get_future();
|
||||
scheduler.event_queue->enqueue([&]
|
||||
{
|
||||
try
|
||||
{
|
||||
task();
|
||||
promise.set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise.set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
future.get(); // Blocks until execution is done in the scheduler thread
|
||||
}
|
||||
|
||||
ASTPtr resource_entity;
|
||||
const String resource_name;
|
||||
SchedulerRoot scheduler;
|
||||
|
||||
// TODO(serxa): consider using resource_manager->mutex + scheduler thread for updates and mutex only for reading to avoid slow acquire/release of classifier
|
||||
/// These field should be accessed only by the scheduler thread
|
||||
std::unordered_map<String, UnifiedSchedulerNodePtr> node_for_workload;
|
||||
UnifiedSchedulerNodePtr root_node;
|
||||
VersionPtr current_version;
|
||||
};
|
||||
|
||||
struct Workload : boost::noncopyable
|
||||
{
|
||||
IOResourceManager * resource_manager;
|
||||
ASTPtr workload_entity;
|
||||
|
||||
Workload(IOResourceManager * resource_manager_, const ASTPtr & workload_entity_);
|
||||
~Workload();
|
||||
|
||||
void updateWorkload(const ASTPtr & new_entity);
|
||||
String getParent() const;
|
||||
};
|
||||
|
||||
class Classifier : public IClassifier
|
||||
{
|
||||
public:
|
||||
~Classifier() override;
|
||||
|
||||
/// Implements IClassifier interface
|
||||
/// NOTE: It is called from query threads (possibly multiple)
|
||||
bool has(const String & resource_name) override;
|
||||
ResourceLink get(const String & resource_name) override;
|
||||
|
||||
/// Attaches/detaches a specific resource
|
||||
/// NOTE: It is called from scheduler threads (possibly multiple)
|
||||
void attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link);
|
||||
void detach(const ResourcePtr & resource);
|
||||
|
||||
private:
|
||||
IOResourceManager * resource_manager;
|
||||
std::mutex mutex;
|
||||
struct Attachment
|
||||
{
|
||||
ResourcePtr resource;
|
||||
VersionPtr version;
|
||||
ResourceLink link;
|
||||
};
|
||||
std::unordered_map<String, Attachment> attachments; // TSA_GUARDED_BY(mutex);
|
||||
};
|
||||
|
||||
void createOrUpdateWorkload(const String & workload_name, const ASTPtr & ast);
|
||||
void deleteWorkload(const String & workload_name);
|
||||
void createOrUpdateResource(const String & resource_name, const ASTPtr & ast);
|
||||
void deleteResource(const String & resource_name);
|
||||
|
||||
// Topological sorting of workloads
|
||||
void topologicallySortedWorkloadsImpl(Workload * workload, std::unordered_set<Workload *> & visited, std::vector<Workload *> & sorted_workloads);
|
||||
std::vector<Workload *> topologicallySortedWorkloads();
|
||||
|
||||
IWorkloadEntityStorage & storage;
|
||||
scope_guard subscription;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
std::unordered_map<String, WorkloadPtr> workloads; // TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, ResourcePtr> resources; // TSA_GUARDED_BY(mutex);
|
||||
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
@ -19,7 +19,7 @@ namespace ErrorCodes
|
||||
* Scheduler node that implements priority scheduling policy.
|
||||
* Requests are scheduled in order of priorities.
|
||||
*/
|
||||
class PriorityPolicy : public ISchedulerNode
|
||||
class PriorityPolicy final : public ISchedulerNode
|
||||
{
|
||||
/// Scheduling state of a child
|
||||
struct Item
|
||||
@ -39,6 +39,23 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
explicit PriorityPolicy(EventQueue * event_queue_, const SchedulerNodeInfo & node_info)
|
||||
: ISchedulerNode(event_queue_, node_info)
|
||||
{}
|
||||
|
||||
~PriorityPolicy() override
|
||||
{
|
||||
// We need to clear `parent` in all children to avoid dangling references
|
||||
while (!children.empty())
|
||||
removeChild(children.begin()->second.get());
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("priority");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/Scheduler/ISchedulerNode.h"
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <mutex>
|
||||
@ -13,7 +14,7 @@ namespace DB
|
||||
* Limited concurrency constraint.
|
||||
* Blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`
|
||||
*/
|
||||
class SemaphoreConstraint : public ISchedulerConstraint
|
||||
class SemaphoreConstraint final : public ISchedulerConstraint
|
||||
{
|
||||
static constexpr Int64 default_max_requests = std::numeric_limits<Int64>::max();
|
||||
static constexpr Int64 default_max_cost = std::numeric_limits<Int64>::max();
|
||||
@ -24,6 +25,25 @@ public:
|
||||
, max_cost(config.getInt64(config_prefix + ".max_cost", config.getInt64(config_prefix + ".max_bytes", default_max_cost)))
|
||||
{}
|
||||
|
||||
SemaphoreConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_, Int64 max_requests_, Int64 max_cost_)
|
||||
: ISchedulerConstraint(event_queue_, info_)
|
||||
, max_requests(max_requests_)
|
||||
, max_cost(max_cost_)
|
||||
{}
|
||||
|
||||
~SemaphoreConstraint() override
|
||||
{
|
||||
// We need to clear `parent` in child to avoid dangling references
|
||||
if (child)
|
||||
removeChild(child.get());
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("inflight_limit");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -68,15 +88,14 @@ public:
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
// Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
|
||||
// The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
|
||||
if (!request->constraint)
|
||||
request->constraint = this;
|
||||
|
||||
// Update state on request arrival
|
||||
std::unique_lock lock(mutex);
|
||||
requests++;
|
||||
cost += request->cost;
|
||||
if (request->addConstraint(this))
|
||||
{
|
||||
// Update state on request arrival
|
||||
requests++;
|
||||
cost += request->cost;
|
||||
}
|
||||
|
||||
child_active = child_now_active;
|
||||
if (!active())
|
||||
busy_periods++;
|
||||
@ -86,10 +105,6 @@ public:
|
||||
|
||||
void finishRequest(ResourceRequest * request) override
|
||||
{
|
||||
// Recursive traverse of parent flow controls in reverse order
|
||||
if (parent_constraint)
|
||||
parent_constraint->finishRequest(request);
|
||||
|
||||
// Update state on request departure
|
||||
std::unique_lock lock(mutex);
|
||||
bool was_active = active();
|
||||
@ -109,6 +124,32 @@ public:
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
/// Update limits.
|
||||
/// Should be called from the scheduler thread because it could lead to activation or deactivation
|
||||
void updateConstraints(const SchedulerNodePtr & self, Int64 new_max_requests, UInt64 new_max_cost)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
bool was_active = active();
|
||||
max_requests = new_max_requests;
|
||||
max_cost = new_max_cost;
|
||||
|
||||
if (parent)
|
||||
{
|
||||
// Activate on transition from inactive state
|
||||
if (!was_active && active())
|
||||
parent->activateChild(this);
|
||||
// Deactivate on transition into inactive state
|
||||
else if (was_active && !active())
|
||||
{
|
||||
// Node deactivation is usually done in dequeueRequest(), but we do not want to
|
||||
// do extra call to active() on every request just to make sure there was no update().
|
||||
// There is no interface method to do deactivation, so we do the following trick.
|
||||
parent->removeChild(this);
|
||||
parent->attachChild(self); // This call is the only reason we have `recursive_mutex`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
@ -150,10 +191,10 @@ private:
|
||||
return satisfied() && child_active;
|
||||
}
|
||||
|
||||
const Int64 max_requests = default_max_requests;
|
||||
const Int64 max_cost = default_max_cost;
|
||||
Int64 max_requests = default_max_requests;
|
||||
Int64 max_cost = default_max_cost;
|
||||
|
||||
std::mutex mutex;
|
||||
std::recursive_mutex mutex;
|
||||
Int64 requests = 0;
|
||||
Int64 cost = 0;
|
||||
bool child_active = false;
|
||||
|
@ -3,8 +3,6 @@
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
|
||||
@ -15,7 +13,7 @@ namespace DB
|
||||
* Limited throughput constraint. Blocks if token-bucket constraint is violated:
|
||||
* i.e. more than `max_burst + duration * max_speed` cost units (aka tokens) dequeued from this node in last `duration` seconds.
|
||||
*/
|
||||
class ThrottlerConstraint : public ISchedulerConstraint
|
||||
class ThrottlerConstraint final : public ISchedulerConstraint
|
||||
{
|
||||
public:
|
||||
static constexpr double default_burst_seconds = 1.0;
|
||||
@ -28,10 +26,28 @@ public:
|
||||
, tokens(max_burst)
|
||||
{}
|
||||
|
||||
ThrottlerConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_, double max_speed_, double max_burst_)
|
||||
: ISchedulerConstraint(event_queue_, info_)
|
||||
, max_speed(max_speed_)
|
||||
, max_burst(max_burst_)
|
||||
, last_update(event_queue_->now())
|
||||
, tokens(max_burst)
|
||||
{}
|
||||
|
||||
~ThrottlerConstraint() override
|
||||
{
|
||||
// We should cancel event on destruction to avoid dangling references from event queue
|
||||
event_queue->cancelPostponed(postponed);
|
||||
|
||||
// We need to clear `parent` in child to avoid dangling reference
|
||||
if (child)
|
||||
removeChild(child.get());
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("bandwidth_limit");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
@ -78,10 +94,7 @@ public:
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
// Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
|
||||
// The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
|
||||
if (!request->constraint)
|
||||
request->constraint = this;
|
||||
// We don't do `request->addConstraint(this)` because `finishRequest()` is no-op
|
||||
|
||||
updateBucket(request->cost);
|
||||
|
||||
@ -92,12 +105,8 @@ public:
|
||||
return {request, active()};
|
||||
}
|
||||
|
||||
void finishRequest(ResourceRequest * request) override
|
||||
void finishRequest(ResourceRequest *) override
|
||||
{
|
||||
// Recursive traverse of parent flow controls in reverse order
|
||||
if (parent_constraint)
|
||||
parent_constraint->finishRequest(request);
|
||||
|
||||
// NOTE: Token-bucket constraint does not require any action when consumption ends
|
||||
}
|
||||
|
||||
@ -108,6 +117,21 @@ public:
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
/// Update limits.
|
||||
/// Should be called from the scheduler thread because it could lead to activation
|
||||
void updateConstraints(double new_max_speed, double new_max_burst)
|
||||
{
|
||||
event_queue->cancelPostponed(postponed);
|
||||
postponed = EventQueue::not_postponed;
|
||||
bool was_active = active();
|
||||
updateBucket(0, true); // To apply previous params for duration since `last_update`
|
||||
max_speed = new_max_speed;
|
||||
max_burst = new_max_burst;
|
||||
updateBucket(0, false); // To postpone (if needed) using new params
|
||||
if (!was_active && active() && parent)
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
return active();
|
||||
@ -150,7 +174,7 @@ private:
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
void updateBucket(ResourceCost use = 0)
|
||||
void updateBucket(ResourceCost use = 0, bool do_not_postpone = false)
|
||||
{
|
||||
auto now = event_queue->now();
|
||||
if (max_speed > 0.0)
|
||||
@ -160,7 +184,7 @@ private:
|
||||
tokens -= use; // This is done outside min() to avoid passing large requests w/o token consumption after long idle period
|
||||
|
||||
// Postpone activation until there is positive amount of tokens
|
||||
if (tokens < 0.0)
|
||||
if (!do_not_postpone && tokens < 0.0)
|
||||
{
|
||||
auto delay_ns = std::chrono::nanoseconds(static_cast<Int64>(-tokens / max_speed * 1e9));
|
||||
if (postponed == EventQueue::not_postponed)
|
||||
@ -184,8 +208,8 @@ private:
|
||||
return satisfied() && child_active;
|
||||
}
|
||||
|
||||
const double max_speed{0}; /// in tokens per second
|
||||
const double max_burst{0}; /// in tokens
|
||||
double max_speed{0}; /// in tokens per second
|
||||
double max_burst{0}; /// in tokens
|
||||
|
||||
EventQueue::TimePoint last_update;
|
||||
UInt64 postponed = EventQueue::not_postponed;
|
||||
|
606
src/Common/Scheduler/Nodes/UnifiedSchedulerNode.h
Normal file
606
src/Common/Scheduler/Nodes/UnifiedSchedulerNode.h
Normal file
@ -0,0 +1,606 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Priority.h>
|
||||
#include <Common/Scheduler/Nodes/PriorityPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/ThrottlerConstraint.h>
|
||||
#include <Common/Scheduler/Nodes/SemaphoreConstraint.h>
|
||||
#include <Common/Scheduler/ISchedulerQueue.h>
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/ISchedulerNode.h>
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class UnifiedSchedulerNode;
|
||||
using UnifiedSchedulerNodePtr = std::shared_ptr<UnifiedSchedulerNode>;
|
||||
|
||||
/*
|
||||
* Unified scheduler node combines multiple nodes internally to provide all available scheduling policies and constraints.
|
||||
* Whole scheduling hierarchy could "logically" consist of unified nodes only. Physically intermediate "internal" nodes
|
||||
* are also present. This approach is easiers for manipulations in runtime than using multiple types of nodes.
|
||||
*
|
||||
* Unified node is capable of updating its internal structure based on:
|
||||
* 1. Number of children (fifo if =0 or fairness/priority if >0).
|
||||
* 2. Priorities of its children (for subtree structure).
|
||||
* 3. `SchedulingSettings` associated with unified node (for throttler and semaphore constraints).
|
||||
*
|
||||
* In general, unified node has "internal" subtree with the following structure:
|
||||
*
|
||||
* THIS <-- UnifiedSchedulerNode object
|
||||
* |
|
||||
* THROTTLER <-- [Optional] Throttling scheduling constraint
|
||||
* |
|
||||
* [If no children]------ SEMAPHORE <-- [Optional] Semaphore constraint
|
||||
* | |
|
||||
* FIFO PRIORITY <-- [Optional] Scheduling policy distinguishing priorities
|
||||
* .-------' '-------.
|
||||
* FAIRNESS[p1] ... FAIRNESS[pN] <-- [Optional] Policies for fairness if priorities are equal
|
||||
* / \ / \
|
||||
* CHILD[p1,w1] ... CHILD[p1,wM] CHILD[pN,w1] ... CHILD[pN,wM] <-- Unified children (UnifiedSchedulerNode objects)
|
||||
*
|
||||
* NOTE: to distinguish different kinds of children we use the following terms:
|
||||
* - immediate child: child of unified object (THROTTLER);
|
||||
* - unified child: leaf of this "internal" subtree (CHILD[p,w]);
|
||||
* - intermediate node: any child that is not UnifiedSchedulerNode (unified child or `this`)
|
||||
*/
|
||||
class UnifiedSchedulerNode final : public ISchedulerNode
|
||||
{
|
||||
private:
|
||||
/// Helper function for managing a parent of a node
|
||||
static void reparent(const SchedulerNodePtr & node, const SchedulerNodePtr & new_parent)
|
||||
{
|
||||
reparent(node, new_parent.get());
|
||||
}
|
||||
|
||||
/// Helper function for managing a parent of a node
|
||||
static void reparent(const SchedulerNodePtr & node, ISchedulerNode * new_parent)
|
||||
{
|
||||
chassert(node);
|
||||
chassert(new_parent);
|
||||
if (new_parent == node->parent)
|
||||
return;
|
||||
if (node->parent)
|
||||
node->parent->removeChild(node.get());
|
||||
new_parent->attachChild(node);
|
||||
}
|
||||
|
||||
/// Helper function for managing a parent of a node
|
||||
static void detach(const SchedulerNodePtr & node)
|
||||
{
|
||||
if (node->parent)
|
||||
node->parent->removeChild(node.get());
|
||||
}
|
||||
|
||||
/// A branch of the tree for a specific priority value
|
||||
struct FairnessBranch
|
||||
{
|
||||
SchedulerNodePtr root; /// FairPolicy node is used if multiple children with the same priority are attached
|
||||
std::unordered_map<String, UnifiedSchedulerNodePtr> children; // basename -> child
|
||||
|
||||
bool empty() const { return children.empty(); }
|
||||
|
||||
SchedulerNodePtr getRoot()
|
||||
{
|
||||
chassert(!children.empty());
|
||||
if (root)
|
||||
return root;
|
||||
chassert(children.size() == 1);
|
||||
return children.begin()->second;
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto [it, inserted] = children.emplace(child->basename, child); !inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::INVALID_SCHEDULER_NODE,
|
||||
"Can't add another child with the same path: {}",
|
||||
it->second->getPath());
|
||||
|
||||
if (children.size() == 2)
|
||||
{
|
||||
// Insert fair node if we have just added the second child
|
||||
chassert(!root);
|
||||
root = std::make_shared<FairPolicy>(event_queue_, SchedulerNodeInfo{});
|
||||
root->info.setPriority(child->info.priority);
|
||||
root->basename = fmt::format("p{}_fair", child->info.priority.value);
|
||||
for (auto & [_, node] : children)
|
||||
reparent(node, root);
|
||||
return root; // New root has been created
|
||||
}
|
||||
else if (children.size() == 1)
|
||||
return child; // We have added single child so far and it is the new root
|
||||
else
|
||||
reparent(child, root);
|
||||
return {}; // Root is the same
|
||||
}
|
||||
|
||||
/// Detaches a child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
/// NOTE: It could also return null if `empty()` after detaching
|
||||
[[nodiscard]] SchedulerNodePtr detachUnifiedChild(EventQueue *, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
auto it = children.find(child->basename);
|
||||
if (it == children.end())
|
||||
return {}; // unknown child
|
||||
|
||||
detach(child);
|
||||
children.erase(it);
|
||||
if (children.size() == 1)
|
||||
{
|
||||
// Remove fair if the only child has left
|
||||
chassert(root);
|
||||
detach(root);
|
||||
root.reset();
|
||||
return children.begin()->second; // The last child is a new root now
|
||||
}
|
||||
else if (children.empty())
|
||||
return {}; // We have detached the last child
|
||||
else
|
||||
return {}; // Root is the same (two or more children have left)
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles all the children nodes with intermediate fair and/or priority nodes
|
||||
struct ChildrenBranch
|
||||
{
|
||||
SchedulerNodePtr root; /// PriorityPolicy node is used if multiple children with different priority are attached
|
||||
std::unordered_map<Priority::Value, FairnessBranch> branches; /// Branches for different priority values
|
||||
|
||||
// Returns true iff there are no unified children attached
|
||||
bool empty() const { return branches.empty(); }
|
||||
|
||||
SchedulerNodePtr getRoot()
|
||||
{
|
||||
chassert(!branches.empty());
|
||||
if (root)
|
||||
return root;
|
||||
return branches.begin()->second.getRoot(); // There should be exactly one child-branch
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
auto [it, new_branch] = branches.try_emplace(child->info.priority);
|
||||
auto & child_branch = it->second;
|
||||
auto branch_root = child_branch.attachUnifiedChild(event_queue_, child);
|
||||
if (!new_branch)
|
||||
{
|
||||
if (branch_root)
|
||||
{
|
||||
if (root)
|
||||
reparent(branch_root, root);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(branch_root);
|
||||
if (branches.size() == 2)
|
||||
{
|
||||
// Insert priority node if we have just added the second branch
|
||||
chassert(!root);
|
||||
root = std::make_shared<PriorityPolicy>(event_queue_, SchedulerNodeInfo{});
|
||||
root->basename = "prio";
|
||||
for (auto & [_, branch] : branches)
|
||||
reparent(branch.getRoot(), root);
|
||||
return root; // New root has been created
|
||||
}
|
||||
else if (branches.size() == 1)
|
||||
return child; // We have added single child so far and it is the new root
|
||||
else
|
||||
reparent(child, root);
|
||||
return {}; // Root is the same
|
||||
}
|
||||
}
|
||||
|
||||
/// Detaches a child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
/// NOTE: It could also return null if `empty()` after detaching
|
||||
[[nodiscard]] SchedulerNodePtr detachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
auto it = branches.find(child->info.priority);
|
||||
if (it == branches.end())
|
||||
return {}; // unknown child
|
||||
|
||||
auto & child_branch = it->second;
|
||||
auto branch_root = child_branch.detachUnifiedChild(event_queue_, child);
|
||||
if (child_branch.empty())
|
||||
{
|
||||
branches.erase(it);
|
||||
if (branches.size() == 1)
|
||||
{
|
||||
// Remove priority node if the only child-branch has left
|
||||
chassert(root);
|
||||
detach(root);
|
||||
root.reset();
|
||||
return branches.begin()->second.getRoot(); // The last child-branch is a new root now
|
||||
}
|
||||
else if (branches.empty())
|
||||
return {}; // We have detached the last child
|
||||
else
|
||||
return {}; // Root is the same (two or more children-branches have left)
|
||||
}
|
||||
if (branch_root)
|
||||
{
|
||||
if (root)
|
||||
reparent(branch_root, root);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {}; // Root is the same
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles degenerate case of zero children (a fifo queue) or delegate to `ChildrenBranch`.
|
||||
struct QueueOrChildrenBranch
|
||||
{
|
||||
SchedulerNodePtr queue; /// FifoQueue node is used if there are no children
|
||||
ChildrenBranch branch; /// Used if there is at least one child
|
||||
|
||||
SchedulerNodePtr getRoot()
|
||||
{
|
||||
if (queue)
|
||||
return queue;
|
||||
else
|
||||
return branch.getRoot();
|
||||
}
|
||||
|
||||
// Should be called after constructor, before any other methods
|
||||
[[nodiscard]] SchedulerNodePtr initialize(EventQueue * event_queue_)
|
||||
{
|
||||
createQueue(event_queue_);
|
||||
return queue;
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (queue)
|
||||
removeQueue();
|
||||
return branch.attachUnifiedChild(event_queue_, child);
|
||||
}
|
||||
|
||||
/// Detaches a child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr detachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (queue)
|
||||
return {}; // No-op, it already has no children
|
||||
auto branch_root = branch.detachUnifiedChild(event_queue_, child);
|
||||
if (branch.empty())
|
||||
{
|
||||
createQueue(event_queue_);
|
||||
return queue;
|
||||
}
|
||||
return branch_root;
|
||||
}
|
||||
|
||||
private:
|
||||
void createQueue(EventQueue * event_queue_)
|
||||
{
|
||||
queue = std::make_shared<FifoQueue>(event_queue_, SchedulerNodeInfo{});
|
||||
queue->basename = "fifo";
|
||||
}
|
||||
|
||||
void removeQueue()
|
||||
{
|
||||
// This unified node will not be able to process resource requests any longer
|
||||
// All remaining resource requests are be aborted on queue destruction
|
||||
detach(queue);
|
||||
std::static_pointer_cast<ISchedulerQueue>(queue)->purgeQueue();
|
||||
queue.reset();
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles all the nodes under this unified node
|
||||
/// Specifically handles constraints with `QueueOrChildrenBranch` under it
|
||||
struct ConstraintsBranch
|
||||
{
|
||||
SchedulerNodePtr throttler;
|
||||
SchedulerNodePtr semaphore;
|
||||
QueueOrChildrenBranch branch;
|
||||
SchedulingSettings settings;
|
||||
|
||||
// Should be called after constructor, before any other methods
|
||||
[[nodiscard]] SchedulerNodePtr initialize(EventQueue * event_queue_, const SchedulingSettings & settings_)
|
||||
{
|
||||
settings = settings_;
|
||||
SchedulerNodePtr node = branch.initialize(event_queue_);
|
||||
if (settings.hasSemaphore())
|
||||
{
|
||||
semaphore = std::make_shared<SemaphoreConstraint>(event_queue_, SchedulerNodeInfo{}, settings.max_requests, settings.max_cost);
|
||||
semaphore->basename = "semaphore";
|
||||
reparent(node, semaphore);
|
||||
node = semaphore;
|
||||
}
|
||||
if (settings.hasThrottler())
|
||||
{
|
||||
throttler = std::make_shared<ThrottlerConstraint>(event_queue_, SchedulerNodeInfo{}, settings.max_speed, settings.max_burst);
|
||||
throttler->basename = "throttler";
|
||||
reparent(node, throttler);
|
||||
node = throttler;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto branch_root = branch.attachUnifiedChild(event_queue_, child))
|
||||
{
|
||||
// If both semaphore and throttler exist we should reparent to the farthest from the root
|
||||
if (semaphore)
|
||||
reparent(branch_root, semaphore);
|
||||
else if (throttler)
|
||||
reparent(branch_root, throttler);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Detaches a child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr detachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto branch_root = branch.detachUnifiedChild(event_queue_, child))
|
||||
{
|
||||
if (semaphore)
|
||||
reparent(branch_root, semaphore);
|
||||
else if (throttler)
|
||||
reparent(branch_root, throttler);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Updates constraint-related nodes.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr updateSchedulingSettings(EventQueue * event_queue_, const SchedulingSettings & new_settings)
|
||||
{
|
||||
SchedulerNodePtr node = branch.getRoot();
|
||||
|
||||
if (!settings.hasSemaphore() && new_settings.hasSemaphore()) // Add semaphore
|
||||
{
|
||||
semaphore = std::make_shared<SemaphoreConstraint>(event_queue_, SchedulerNodeInfo{}, new_settings.max_requests, new_settings.max_cost);
|
||||
semaphore->basename = "semaphore";
|
||||
reparent(node, semaphore);
|
||||
node = semaphore;
|
||||
}
|
||||
else if (settings.hasSemaphore() && !new_settings.hasSemaphore()) // Remove semaphore
|
||||
{
|
||||
detach(semaphore);
|
||||
semaphore.reset();
|
||||
}
|
||||
else if (settings.hasSemaphore() && new_settings.hasSemaphore()) // Update semaphore
|
||||
{
|
||||
static_cast<SemaphoreConstraint&>(*semaphore).updateConstraints(semaphore, new_settings.max_requests, new_settings.max_cost);
|
||||
node = semaphore;
|
||||
}
|
||||
|
||||
if (!settings.hasThrottler() && new_settings.hasThrottler()) // Add throttler
|
||||
{
|
||||
throttler = std::make_shared<ThrottlerConstraint>(event_queue_, SchedulerNodeInfo{}, new_settings.max_speed, new_settings.max_burst);
|
||||
throttler->basename = "throttler";
|
||||
reparent(node, throttler);
|
||||
node = throttler;
|
||||
}
|
||||
else if (settings.hasThrottler() && !new_settings.hasThrottler()) // Remove throttler
|
||||
{
|
||||
detach(throttler);
|
||||
throttler.reset();
|
||||
}
|
||||
else if (settings.hasThrottler() && new_settings.hasThrottler()) // Update throttler
|
||||
{
|
||||
static_cast<ThrottlerConstraint&>(*throttler).updateConstraints(new_settings.max_speed, new_settings.max_burst);
|
||||
node = throttler;
|
||||
}
|
||||
|
||||
settings = new_settings;
|
||||
return node;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
explicit UnifiedSchedulerNode(EventQueue * event_queue_, const SchedulingSettings & settings)
|
||||
: ISchedulerNode(event_queue_, SchedulerNodeInfo(settings.weight, settings.priority))
|
||||
{
|
||||
immediate_child = impl.initialize(event_queue, settings);
|
||||
reparent(immediate_child, this);
|
||||
}
|
||||
|
||||
~UnifiedSchedulerNode() override
|
||||
{
|
||||
// We need to clear `parent` in child to avoid dangling references
|
||||
if (immediate_child)
|
||||
removeChild(immediate_child.get());
|
||||
}
|
||||
|
||||
/// Attaches a unified child as a leaf of internal subtree and insert or update all the intermediate nodes
|
||||
/// NOTE: Do not confuse with `attachChild()` which is used only for immediate children
|
||||
void attachUnifiedChild(const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto new_child = impl.attachUnifiedChild(event_queue, child))
|
||||
reparent(new_child, this);
|
||||
}
|
||||
|
||||
/// Detaches unified child and update all the intermediate nodes.
|
||||
/// Detached child could be safely attached to another parent.
|
||||
/// NOTE: Do not confuse with `removeChild()` which is used only for immediate children
|
||||
void detachUnifiedChild(const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto new_child = impl.detachUnifiedChild(event_queue, child))
|
||||
reparent(new_child, this);
|
||||
}
|
||||
|
||||
static bool updateRequiresDetach(const String & old_parent, const String & new_parent, const SchedulingSettings & old_settings, const SchedulingSettings & new_settings)
|
||||
{
|
||||
return old_parent != new_parent || old_settings.priority != new_settings.priority;
|
||||
}
|
||||
|
||||
/// Updates scheduling settings. Set of constraints might change.
|
||||
/// NOTE: Caller is responsible for detaching and attaching if `updateRequiresDetach` returns true
|
||||
void updateSchedulingSettings(const SchedulingSettings & new_settings)
|
||||
{
|
||||
info.setPriority(new_settings.priority);
|
||||
info.setWeight(new_settings.weight);
|
||||
if (auto new_child = impl.updateSchedulingSettings(event_queue, new_settings))
|
||||
reparent(new_child, this);
|
||||
}
|
||||
|
||||
const SchedulingSettings & getSettings() const
|
||||
{
|
||||
return impl.settings;
|
||||
}
|
||||
|
||||
/// Returns the queue to be used for resource requests or `nullptr` if it has unified children
|
||||
std::shared_ptr<ISchedulerQueue> getQueue() const
|
||||
{
|
||||
return static_pointer_cast<ISchedulerQueue>(impl.branch.queue);
|
||||
}
|
||||
|
||||
/// Collects nodes that could be accessed with raw pointers by resource requests (queue and constraints)
|
||||
/// NOTE: This is a building block for classifier. Note that due to possible movement of a queue, set of constraints
|
||||
/// for that queue might change in future, and `request->constraints` might reference nodes not in
|
||||
/// the initial set of nodes returned by `addRawPointerNodes()`. To avoid destruction of such additional nodes
|
||||
/// classifier must (indirectly) hold nodes return by `addRawPointerNodes()` for all future versions of
|
||||
/// all unified nodes. Such a version control is done by `IOResourceManager`.
|
||||
void addRawPointerNodes(std::vector<SchedulerNodePtr> & nodes)
|
||||
{
|
||||
// NOTE: `impl.throttler` could be skipped, because ThrottlerConstraint does not call `request->addConstraint()`
|
||||
if (impl.semaphore)
|
||||
nodes.push_back(impl.semaphore);
|
||||
if (impl.branch.queue)
|
||||
nodes.push_back(impl.branch.queue);
|
||||
for (auto & [_, branch] : impl.branch.branch.branches)
|
||||
{
|
||||
for (auto & [_, child] : branch.children)
|
||||
child->addRawPointerNodes(nodes);
|
||||
}
|
||||
}
|
||||
|
||||
bool hasUnifiedChildren() const
|
||||
{
|
||||
return impl.branch.queue == nullptr;
|
||||
}
|
||||
|
||||
/// Introspection. Calls a visitor for self and every internal node. Do not recurse into unified children.
|
||||
void forEachSchedulerNode(std::function<void(ISchedulerNode *)> visitor)
|
||||
{
|
||||
visitor(this);
|
||||
if (impl.throttler)
|
||||
visitor(impl.throttler.get());
|
||||
if (impl.semaphore)
|
||||
visitor(impl.semaphore.get());
|
||||
if (impl.branch.queue)
|
||||
visitor(impl.branch.queue.get());
|
||||
if (impl.branch.branch.root) // priority
|
||||
visitor(impl.branch.branch.root.get());
|
||||
for (auto & [_, branch] : impl.branch.branch.branches)
|
||||
{
|
||||
if (branch.root) // fairness
|
||||
visitor(branch.root.get());
|
||||
}
|
||||
}
|
||||
|
||||
protected: // Hide all the ISchedulerNode interface methods as an implementation details
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("unified");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode *) override
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "UnifiedSchedulerNode should not be used with CustomResourceManager");
|
||||
}
|
||||
|
||||
/// Attaches an immediate child (used through `reparent()`)
|
||||
void attachChild(const SchedulerNodePtr & child_) override
|
||||
{
|
||||
immediate_child = child_;
|
||||
immediate_child->setParent(this);
|
||||
|
||||
// Activate if required
|
||||
if (immediate_child->isActive())
|
||||
activateChild(immediate_child.get());
|
||||
}
|
||||
|
||||
/// Removes an immediate child (used through `reparent()`)
|
||||
void removeChild(ISchedulerNode * child) override
|
||||
{
|
||||
if (immediate_child.get() == child)
|
||||
{
|
||||
child_active = false; // deactivate
|
||||
immediate_child->setParent(nullptr); // detach
|
||||
immediate_child.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ISchedulerNode * getChild(const String & child_name) override
|
||||
{
|
||||
if (immediate_child->basename == child_name)
|
||||
return immediate_child.get();
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
auto [request, child_now_active] = immediate_child->dequeueRequest();
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
child_active = child_now_active;
|
||||
if (!child_active)
|
||||
busy_periods++;
|
||||
incrementDequeued(request->cost);
|
||||
return {request, child_active};
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
return child_active;
|
||||
}
|
||||
|
||||
/// Shows number of immediate active children (for introspection)
|
||||
size_t activeChildren() override
|
||||
{
|
||||
return child_active;
|
||||
}
|
||||
|
||||
/// Activate an immediate child
|
||||
void activateChild(ISchedulerNode * child) override
|
||||
{
|
||||
if (child == immediate_child.get())
|
||||
if (!std::exchange(child_active, true) && parent)
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
private:
|
||||
ConstraintsBranch impl;
|
||||
SchedulerNodePtr immediate_child; // An immediate child (actually the root of the whole subtree)
|
||||
bool child_active = false;
|
||||
};
|
||||
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerDynamicResourceManager(ResourceManagerFactory &);
|
||||
|
||||
void registerResourceManagers()
|
||||
{
|
||||
auto & factory = ResourceManagerFactory::instance();
|
||||
registerDynamicResourceManager(factory);
|
||||
}
|
||||
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerResourceManagers();
|
||||
|
||||
}
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
#include <Common/Scheduler/SchedulerRoot.h>
|
||||
#include <Common/Scheduler/ResourceGuard.h>
|
||||
@ -7,26 +10,35 @@
|
||||
#include <Common/Scheduler/Nodes/PriorityPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/Nodes/SemaphoreConstraint.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
#include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <barrier>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_ACCESS_DENIED;
|
||||
}
|
||||
|
||||
struct ResourceTestBase
|
||||
{
|
||||
ResourceTestBase()
|
||||
{
|
||||
[[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); registerResourceManagers(); return true; }();
|
||||
[[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); return true; }();
|
||||
}
|
||||
|
||||
template <class TClass>
|
||||
@ -37,10 +49,16 @@ struct ResourceTestBase
|
||||
Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)};
|
||||
String config_prefix = "node";
|
||||
|
||||
return add<TClass>(event_queue, root_node, path, std::ref(*config), config_prefix);
|
||||
}
|
||||
|
||||
template <class TClass, class... Args>
|
||||
static TClass * add(EventQueue * event_queue, SchedulerNodePtr & root_node, const String & path, Args... args)
|
||||
{
|
||||
if (path == "/")
|
||||
{
|
||||
EXPECT_TRUE(root_node.get() == nullptr);
|
||||
root_node.reset(new TClass(event_queue, *config, config_prefix));
|
||||
root_node.reset(new TClass(event_queue, std::forward<Args>(args)...));
|
||||
return static_cast<TClass *>(root_node.get());
|
||||
}
|
||||
|
||||
@ -65,73 +83,114 @@ struct ResourceTestBase
|
||||
}
|
||||
|
||||
EXPECT_TRUE(!child_name.empty()); // wrong path
|
||||
SchedulerNodePtr node = std::make_shared<TClass>(event_queue, *config, config_prefix);
|
||||
SchedulerNodePtr node = std::make_shared<TClass>(event_queue, std::forward<Args>(args)...);
|
||||
node->basename = child_name;
|
||||
parent->attachChild(node);
|
||||
return static_cast<TClass *>(node.get());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ConstraintTest : public SemaphoreConstraint
|
||||
{
|
||||
explicit ConstraintTest(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
|
||||
: SemaphoreConstraint(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
auto [request, active] = SemaphoreConstraint::dequeueRequest();
|
||||
if (request)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
requests.insert(request);
|
||||
}
|
||||
return {request, active};
|
||||
}
|
||||
|
||||
void finishRequest(ResourceRequest * request) override
|
||||
{
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
requests.erase(request);
|
||||
}
|
||||
SemaphoreConstraint::finishRequest(request);
|
||||
}
|
||||
|
||||
std::mutex mutex;
|
||||
std::set<ResourceRequest *> requests;
|
||||
};
|
||||
|
||||
class ResourceTestClass : public ResourceTestBase
|
||||
{
|
||||
struct Request : public ResourceRequest
|
||||
{
|
||||
ResourceTestClass * test;
|
||||
String name;
|
||||
|
||||
Request(ResourceCost cost_, const String & name_)
|
||||
Request(ResourceTestClass * test_, ResourceCost cost_, const String & name_)
|
||||
: ResourceRequest(cost_)
|
||||
, test(test_)
|
||||
, name(name_)
|
||||
{}
|
||||
|
||||
void execute() override
|
||||
{
|
||||
}
|
||||
|
||||
void failed(const std::exception_ptr &) override
|
||||
{
|
||||
test->failed_cost += cost;
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
~ResourceTestClass()
|
||||
{
|
||||
if (root_node)
|
||||
dequeue(); // Just to avoid any leaks of `Request` object
|
||||
}
|
||||
|
||||
template <class TClass>
|
||||
void add(const String & path, const String & xml = {})
|
||||
{
|
||||
ResourceTestBase::add<TClass>(&event_queue, root_node, path, xml);
|
||||
}
|
||||
|
||||
template <class TClass, class... Args>
|
||||
void addCustom(const String & path, Args... args)
|
||||
{
|
||||
ResourceTestBase::add<TClass>(&event_queue, root_node, path, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
UnifiedSchedulerNodePtr createUnifiedNode(const String & basename, const SchedulingSettings & settings = {})
|
||||
{
|
||||
return createUnifiedNode(basename, {}, settings);
|
||||
}
|
||||
|
||||
UnifiedSchedulerNodePtr createUnifiedNode(const String & basename, const UnifiedSchedulerNodePtr & parent, const SchedulingSettings & settings = {})
|
||||
{
|
||||
auto node = std::make_shared<UnifiedSchedulerNode>(&event_queue, settings);
|
||||
node->basename = basename;
|
||||
if (parent)
|
||||
{
|
||||
parent->attachUnifiedChild(node);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_TRUE(root_node.get() == nullptr);
|
||||
root_node = node;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
// Updates the parent and/or scheduling settings for a specidfied `node`.
|
||||
// Unit test implementation must make sure that all needed queues and constraints are not going to be destroyed.
|
||||
// Normally it is the responsibility of IOResourceManager, but we do not use it here, so manual version control is required.
|
||||
// (see IOResourceManager::Resource::updateCurrentVersion() fo details)
|
||||
void updateUnifiedNode(const UnifiedSchedulerNodePtr & node, const UnifiedSchedulerNodePtr & old_parent, const UnifiedSchedulerNodePtr & new_parent, const SchedulingSettings & new_settings)
|
||||
{
|
||||
EXPECT_TRUE((old_parent && new_parent) || (!old_parent && !new_parent)); // changing root node is not supported
|
||||
bool detached = false;
|
||||
if (UnifiedSchedulerNode::updateRequiresDetach(
|
||||
old_parent ? old_parent->basename : "",
|
||||
new_parent ? new_parent->basename : "",
|
||||
node->getSettings(),
|
||||
new_settings))
|
||||
{
|
||||
if (old_parent)
|
||||
old_parent->detachUnifiedChild(node);
|
||||
detached = true;
|
||||
}
|
||||
|
||||
node->updateSchedulingSettings(new_settings);
|
||||
|
||||
if (detached && new_parent)
|
||||
new_parent->attachUnifiedChild(node);
|
||||
}
|
||||
|
||||
|
||||
void enqueue(const UnifiedSchedulerNodePtr & node, const std::vector<ResourceCost> & costs)
|
||||
{
|
||||
enqueueImpl(node->getQueue().get(), costs, node->basename);
|
||||
}
|
||||
|
||||
void enqueue(const String & path, const std::vector<ResourceCost> & costs)
|
||||
{
|
||||
ASSERT_TRUE(root_node.get() != nullptr); // root should be initialized first
|
||||
ISchedulerNode * node = root_node.get();
|
||||
size_t pos = 1;
|
||||
while (pos < path.length())
|
||||
while (node && pos < path.length())
|
||||
{
|
||||
size_t slash = path.find('/', pos);
|
||||
if (slash != String::npos)
|
||||
@ -146,13 +205,17 @@ public:
|
||||
pos = String::npos;
|
||||
}
|
||||
}
|
||||
ISchedulerQueue * queue = dynamic_cast<ISchedulerQueue *>(node);
|
||||
ASSERT_TRUE(queue != nullptr); // not a queue
|
||||
if (node)
|
||||
enqueueImpl(dynamic_cast<ISchedulerQueue *>(node), costs);
|
||||
}
|
||||
|
||||
void enqueueImpl(ISchedulerQueue * queue, const std::vector<ResourceCost> & costs, const String & name = {})
|
||||
{
|
||||
ASSERT_TRUE(queue != nullptr); // not a queue
|
||||
if (!queue)
|
||||
return; // to make clang-analyzer-core.NonNullParamChecker happy
|
||||
for (ResourceCost cost : costs)
|
||||
{
|
||||
queue->enqueueRequest(new Request(cost, queue->basename));
|
||||
}
|
||||
queue->enqueueRequest(new Request(this, cost, name.empty() ? queue->basename : name));
|
||||
processEvents(); // to activate queues
|
||||
}
|
||||
|
||||
@ -208,6 +271,12 @@ public:
|
||||
consumed_cost[name] -= value;
|
||||
}
|
||||
|
||||
void failed(ResourceCost value)
|
||||
{
|
||||
EXPECT_EQ(failed_cost, value);
|
||||
failed_cost -= value;
|
||||
}
|
||||
|
||||
void processEvents()
|
||||
{
|
||||
while (event_queue.tryProcess()) {}
|
||||
@ -217,8 +286,11 @@ private:
|
||||
EventQueue event_queue;
|
||||
SchedulerNodePtr root_node;
|
||||
std::unordered_map<String, ResourceCost> consumed_cost;
|
||||
ResourceCost failed_cost = 0;
|
||||
};
|
||||
|
||||
enum EnqueueOnlyEnum { EnqueueOnly };
|
||||
|
||||
template <class TManager>
|
||||
struct ResourceTestManager : public ResourceTestBase
|
||||
{
|
||||
@ -230,16 +302,49 @@ struct ResourceTestManager : public ResourceTestBase
|
||||
struct Guard : public ResourceGuard
|
||||
{
|
||||
ResourceTestManager & t;
|
||||
ResourceCost cost;
|
||||
|
||||
Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost)
|
||||
: ResourceGuard(ResourceGuard::Metrics::getIOWrite(), link_, cost, Lock::Defer)
|
||||
/// Works like regular ResourceGuard, ready for consumption after constructor
|
||||
Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost_)
|
||||
: ResourceGuard(ResourceGuard::Metrics::getIOWrite(), link_, cost_, Lock::Defer)
|
||||
, t(t_)
|
||||
, cost(cost_)
|
||||
{
|
||||
t.onEnqueue(link);
|
||||
waitExecute();
|
||||
}
|
||||
|
||||
/// Just enqueue resource request, do not block (needed for tests to sync). Call `waitExecuted()` afterwards
|
||||
Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost_, EnqueueOnlyEnum)
|
||||
: ResourceGuard(ResourceGuard::Metrics::getIOWrite(), link_, cost_, Lock::Defer)
|
||||
, t(t_)
|
||||
, cost(cost_)
|
||||
{
|
||||
t.onEnqueue(link);
|
||||
}
|
||||
|
||||
/// Waits for ResourceRequest::execute() to be called for enqueued request
|
||||
void waitExecute()
|
||||
{
|
||||
lock();
|
||||
t.onExecute(link);
|
||||
consume(cost);
|
||||
}
|
||||
|
||||
/// Waits for ResourceRequest::failure() to be called for enqueued request
|
||||
void waitFailed(const String & pattern)
|
||||
{
|
||||
try
|
||||
{
|
||||
lock();
|
||||
FAIL();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
ASSERT_EQ(e.code(), ErrorCodes::RESOURCE_ACCESS_DENIED);
|
||||
ASSERT_TRUE(e.message().contains(pattern));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct TItem
|
||||
@ -264,10 +369,24 @@ struct ResourceTestManager : public ResourceTestBase
|
||||
, busy_period(thread_count)
|
||||
{}
|
||||
|
||||
enum DoNotInitManagerEnum { DoNotInitManager };
|
||||
|
||||
explicit ResourceTestManager(size_t thread_count, DoNotInitManagerEnum)
|
||||
: busy_period(thread_count)
|
||||
{}
|
||||
|
||||
~ResourceTestManager()
|
||||
{
|
||||
wait();
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
{
|
||||
if (thread.joinable())
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void update(const String & xml)
|
||||
|
@ -2,15 +2,15 @@
|
||||
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/DynamicResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/CustomResourceManager.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestManager<DynamicResourceManager>;
|
||||
using ResourceTest = ResourceTestManager<CustomResourceManager>;
|
||||
using TestGuard = ResourceTest::Guard;
|
||||
|
||||
TEST(SchedulerDynamicResourceManager, Smoke)
|
||||
TEST(SchedulerCustomResourceManager, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -31,25 +31,25 @@ TEST(SchedulerDynamicResourceManager, Smoke)
|
||||
</clickhouse>
|
||||
)CONFIG");
|
||||
|
||||
ClassifierPtr cA = t.manager->acquire("A");
|
||||
ClassifierPtr cB = t.manager->acquire("B");
|
||||
ClassifierPtr c_a = t.manager->acquire("A");
|
||||
ClassifierPtr c_b = t.manager->acquire("B");
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
ResourceGuard gA(ResourceGuard::Metrics::getIOWrite(), cA->get("res1"), 1, ResourceGuard::Lock::Defer);
|
||||
gA.lock();
|
||||
gA.consume(1);
|
||||
gA.unlock();
|
||||
ResourceGuard g_a(ResourceGuard::Metrics::getIOWrite(), c_a->get("res1"), 1, ResourceGuard::Lock::Defer);
|
||||
g_a.lock();
|
||||
g_a.consume(1);
|
||||
g_a.unlock();
|
||||
|
||||
ResourceGuard gB(ResourceGuard::Metrics::getIOWrite(), cB->get("res1"));
|
||||
gB.unlock();
|
||||
ResourceGuard g_b(ResourceGuard::Metrics::getIOWrite(), c_b->get("res1"));
|
||||
g_b.unlock();
|
||||
|
||||
ResourceGuard gC(ResourceGuard::Metrics::getIORead(), cB->get("res1"));
|
||||
gB.consume(2);
|
||||
ResourceGuard g_c(ResourceGuard::Metrics::getIORead(), c_b->get("res1"));
|
||||
g_b.consume(2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerDynamicResourceManager, Fairness)
|
||||
TEST(SchedulerCustomResourceManager, Fairness)
|
||||
{
|
||||
// Total cost for A and B cannot differ for more than 1 (every request has cost equal to 1).
|
||||
// Requests from A use `value = 1` and from B `value = -1` is used.
|
@ -13,6 +13,12 @@ public:
|
||||
, log(log_)
|
||||
{}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fake");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
void attachChild(const SchedulerNodePtr & child) override
|
||||
{
|
||||
log += " +" + child->basename;
|
||||
|
335
src/Common/Scheduler/Nodes/tests/gtest_io_resource_manager.cpp
Normal file
335
src/Common/Scheduler/Nodes/tests/gtest_io_resource_manager.cpp
Normal file
@ -0,0 +1,335 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
#include <Common/Scheduler/Nodes/IOResourceManager.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Parsers/ParserCreateWorkloadQuery.h>
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
#include <Parsers/ParserDropWorkloadQuery.h>
|
||||
#include <Parsers/ParserDropResourceQuery.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
class WorkloadEntityTestStorage : public WorkloadEntityStorageBase
|
||||
{
|
||||
public:
|
||||
WorkloadEntityTestStorage()
|
||||
: WorkloadEntityStorageBase(Context::getGlobalContextInstance())
|
||||
{}
|
||||
|
||||
void loadEntities() override {}
|
||||
|
||||
void executeQuery(const String & query)
|
||||
{
|
||||
ParserCreateWorkloadQuery create_workload_p;
|
||||
ParserDropWorkloadQuery drop_workload_p;
|
||||
ParserCreateResourceQuery create_resource_p;
|
||||
ParserDropResourceQuery drop_resource_p;
|
||||
|
||||
auto parse = [&] (IParser & parser)
|
||||
{
|
||||
String error;
|
||||
const char * end = query.data();
|
||||
return tryParseQuery(
|
||||
parser,
|
||||
end,
|
||||
query.data() + query.size(),
|
||||
error,
|
||||
false,
|
||||
"",
|
||||
false,
|
||||
0,
|
||||
DBMS_DEFAULT_MAX_PARSER_DEPTH,
|
||||
DBMS_DEFAULT_MAX_PARSER_BACKTRACKS,
|
||||
true);
|
||||
};
|
||||
|
||||
if (ASTPtr create_workload = parse(create_workload_p))
|
||||
{
|
||||
auto & parsed = create_workload->as<ASTCreateWorkloadQuery &>();
|
||||
auto workload_name = parsed.getWorkloadName();
|
||||
bool throw_if_exists = !parsed.if_not_exists && !parsed.or_replace;
|
||||
bool replace_if_exists = parsed.or_replace;
|
||||
|
||||
storeEntity(
|
||||
nullptr,
|
||||
WorkloadEntityType::Workload,
|
||||
workload_name,
|
||||
create_workload,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
{});
|
||||
}
|
||||
else if (ASTPtr create_resource = parse(create_resource_p))
|
||||
{
|
||||
auto & parsed = create_resource->as<ASTCreateResourceQuery &>();
|
||||
auto resource_name = parsed.getResourceName();
|
||||
bool throw_if_exists = !parsed.if_not_exists && !parsed.or_replace;
|
||||
bool replace_if_exists = parsed.or_replace;
|
||||
|
||||
storeEntity(
|
||||
nullptr,
|
||||
WorkloadEntityType::Resource,
|
||||
resource_name,
|
||||
create_resource,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
{});
|
||||
}
|
||||
else if (ASTPtr drop_workload = parse(drop_workload_p))
|
||||
{
|
||||
auto & parsed = drop_workload->as<ASTDropWorkloadQuery &>();
|
||||
bool throw_if_not_exists = !parsed.if_exists;
|
||||
removeEntity(
|
||||
nullptr,
|
||||
WorkloadEntityType::Workload,
|
||||
parsed.workload_name,
|
||||
throw_if_not_exists);
|
||||
}
|
||||
else if (ASTPtr drop_resource = parse(drop_resource_p))
|
||||
{
|
||||
auto & parsed = drop_resource->as<ASTDropResourceQuery &>();
|
||||
bool throw_if_not_exists = !parsed.if_exists;
|
||||
removeEntity(
|
||||
nullptr,
|
||||
WorkloadEntityType::Resource,
|
||||
parsed.resource_name,
|
||||
throw_if_not_exists);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid query in WorkloadEntityTestStorage: {}", query);
|
||||
}
|
||||
|
||||
private:
|
||||
WorkloadEntityStorageBase::OperationResult storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override
|
||||
{
|
||||
UNUSED(current_context, entity_type, entity_name, create_entity_query, throw_if_exists, replace_if_exists, settings);
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
|
||||
WorkloadEntityStorageBase::OperationResult removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override
|
||||
{
|
||||
UNUSED(current_context, entity_type, entity_name, throw_if_not_exists);
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
};
|
||||
|
||||
struct ResourceTest : ResourceTestManager<IOResourceManager>
|
||||
{
|
||||
WorkloadEntityTestStorage storage;
|
||||
|
||||
explicit ResourceTest(size_t thread_count = 1)
|
||||
: ResourceTestManager(thread_count, DoNotInitManager)
|
||||
{
|
||||
manager = std::make_shared<IOResourceManager>(storage);
|
||||
}
|
||||
|
||||
void query(const String & query_str)
|
||||
{
|
||||
storage.executeQuery(query_str);
|
||||
}
|
||||
|
||||
template <class Func>
|
||||
void async(const String & workload, Func func)
|
||||
{
|
||||
threads.emplace_back([=, this, func2 = std::move(func)]
|
||||
{
|
||||
ClassifierPtr classifier = manager->acquire(workload);
|
||||
func2(classifier);
|
||||
});
|
||||
}
|
||||
|
||||
template <class Func>
|
||||
void async(const String & workload, const String & resource, Func func)
|
||||
{
|
||||
threads.emplace_back([=, this, func2 = std::move(func)]
|
||||
{
|
||||
ClassifierPtr classifier = manager->acquire(workload);
|
||||
ResourceLink link = classifier->get(resource);
|
||||
func2(link);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
using TestGuard = ResourceTest::Guard;
|
||||
|
||||
TEST(SchedulerIOResourceManager, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
t.query("CREATE RESOURCE res1 (WRITE DISK disk, READ DISK disk)");
|
||||
t.query("CREATE WORKLOAD all SETTINGS max_requests = 10");
|
||||
t.query("CREATE WORKLOAD A in all");
|
||||
t.query("CREATE WORKLOAD B in all SETTINGS weight = 3");
|
||||
|
||||
ClassifierPtr c_a = t.manager->acquire("A");
|
||||
ClassifierPtr c_b = t.manager->acquire("B");
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
ResourceGuard g_a(ResourceGuard::Metrics::getIOWrite(), c_a->get("res1"), 1, ResourceGuard::Lock::Defer);
|
||||
g_a.lock();
|
||||
g_a.consume(1);
|
||||
g_a.unlock();
|
||||
|
||||
ResourceGuard g_b(ResourceGuard::Metrics::getIOWrite(), c_b->get("res1"));
|
||||
g_b.unlock();
|
||||
|
||||
ResourceGuard g_c(ResourceGuard::Metrics::getIORead(), c_b->get("res1"));
|
||||
g_b.consume(2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerIOResourceManager, Fairness)
|
||||
{
|
||||
// Total cost for A and B cannot differ for more than 1 (every request has cost equal to 1).
|
||||
// Requests from A use `value = 1` and from B `value = -1` is used.
|
||||
std::atomic<Int64> unfairness = 0;
|
||||
auto fairness_diff = [&] (Int64 value)
|
||||
{
|
||||
Int64 cur_unfairness = unfairness.fetch_add(value, std::memory_order_relaxed) + value;
|
||||
EXPECT_NEAR(cur_unfairness, 0, 1);
|
||||
};
|
||||
|
||||
constexpr size_t threads_per_queue = 2;
|
||||
int requests_per_thread = 100;
|
||||
ResourceTest t(2 * threads_per_queue + 1);
|
||||
|
||||
t.query("CREATE RESOURCE res1 (WRITE DISK disk, READ DISK disk)");
|
||||
t.query("CREATE WORKLOAD all SETTINGS max_requests = 1");
|
||||
t.query("CREATE WORKLOAD A IN all");
|
||||
t.query("CREATE WORKLOAD B IN all");
|
||||
t.query("CREATE WORKLOAD leader IN all");
|
||||
|
||||
for (int thread = 0; thread < threads_per_queue; thread++)
|
||||
{
|
||||
t.threads.emplace_back([&]
|
||||
{
|
||||
ClassifierPtr c = t.manager->acquire("A");
|
||||
ResourceLink link = c->get("res1");
|
||||
t.startBusyPeriod(link, 1, requests_per_thread);
|
||||
for (int request = 0; request < requests_per_thread; request++)
|
||||
{
|
||||
TestGuard g(t, link, 1);
|
||||
fairness_diff(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (int thread = 0; thread < threads_per_queue; thread++)
|
||||
{
|
||||
t.threads.emplace_back([&]
|
||||
{
|
||||
ClassifierPtr c = t.manager->acquire("B");
|
||||
ResourceLink link = c->get("res1");
|
||||
t.startBusyPeriod(link, 1, requests_per_thread);
|
||||
for (int request = 0; request < requests_per_thread; request++)
|
||||
{
|
||||
TestGuard g(t, link, 1);
|
||||
fairness_diff(-1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ClassifierPtr c = t.manager->acquire("leader");
|
||||
ResourceLink link = c->get("res1");
|
||||
t.blockResource(link);
|
||||
|
||||
t.wait(); // Wait for threads to finish before destructing locals
|
||||
}
|
||||
|
||||
TEST(SchedulerIOResourceManager, DropNotEmptyQueue)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
t.query("CREATE RESOURCE res1 (WRITE DISK disk, READ DISK disk)");
|
||||
t.query("CREATE WORKLOAD all SETTINGS max_requests = 1");
|
||||
t.query("CREATE WORKLOAD intermediate IN all");
|
||||
|
||||
std::barrier sync_before_enqueue(2);
|
||||
std::barrier sync_before_drop(3);
|
||||
std::barrier sync_after_drop(2);
|
||||
t.async("intermediate", "res1", [&] (ResourceLink link)
|
||||
{
|
||||
TestGuard g(t, link, 1);
|
||||
sync_before_enqueue.arrive_and_wait();
|
||||
sync_before_drop.arrive_and_wait(); // 1st resource request is consuming
|
||||
sync_after_drop.arrive_and_wait(); // 1st resource request is still consuming
|
||||
});
|
||||
|
||||
sync_before_enqueue.arrive_and_wait(); // to maintain correct order of resource requests
|
||||
|
||||
t.async("intermediate", "res1", [&] (ResourceLink link)
|
||||
{
|
||||
TestGuard g(t, link, 1, EnqueueOnly);
|
||||
sync_before_drop.arrive_and_wait(); // 2nd resource request is enqueued
|
||||
g.waitFailed("is about to be destructed");
|
||||
});
|
||||
|
||||
sync_before_drop.arrive_and_wait(); // main thread triggers FifoQueue destruction by adding a unified child
|
||||
t.query("CREATE WORKLOAD leaf IN intermediate");
|
||||
sync_after_drop.arrive_and_wait();
|
||||
|
||||
t.wait(); // Wait for threads to finish before destructing locals
|
||||
}
|
||||
|
||||
TEST(SchedulerIOResourceManager, DropNotEmptyQueueLong)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
t.query("CREATE RESOURCE res1 (WRITE DISK disk, READ DISK disk)");
|
||||
t.query("CREATE WORKLOAD all SETTINGS max_requests = 1");
|
||||
t.query("CREATE WORKLOAD intermediate IN all");
|
||||
|
||||
static constexpr int queue_size = 100;
|
||||
std::barrier sync_before_enqueue(2);
|
||||
std::barrier sync_before_drop(2 + queue_size);
|
||||
std::barrier sync_after_drop(2);
|
||||
t.async("intermediate", "res1", [&] (ResourceLink link)
|
||||
{
|
||||
TestGuard g(t, link, 1);
|
||||
sync_before_enqueue.arrive_and_wait();
|
||||
sync_before_drop.arrive_and_wait(); // 1st resource request is consuming
|
||||
sync_after_drop.arrive_and_wait(); // 1st resource request is still consuming
|
||||
});
|
||||
|
||||
sync_before_enqueue.arrive_and_wait(); // to maintain correct order of resource requests
|
||||
|
||||
for (int i = 0; i < queue_size; i++)
|
||||
{
|
||||
t.async("intermediate", "res1", [&] (ResourceLink link)
|
||||
{
|
||||
TestGuard g(t, link, 1, EnqueueOnly);
|
||||
sync_before_drop.arrive_and_wait(); // many resource requests are enqueued
|
||||
g.waitFailed("is about to be destructed");
|
||||
});
|
||||
}
|
||||
|
||||
sync_before_drop.arrive_and_wait(); // main thread triggers FifoQueue destruction by adding a unified child
|
||||
t.query("CREATE WORKLOAD leaf IN intermediate");
|
||||
sync_after_drop.arrive_and_wait();
|
||||
|
||||
t.wait(); // Wait for threads to finish before destructing locals
|
||||
}
|
@ -8,18 +8,17 @@ using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestClass;
|
||||
|
||||
/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
|
||||
|
||||
TEST(DISABLED_SchedulerFairPolicy, Factory)
|
||||
TEST(SchedulerFairPolicy, Factory)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
Poco::AutoPtr cfg = new Poco::Util::XMLConfiguration();
|
||||
SchedulerNodePtr fair = SchedulerNodeFactory::instance().get("fair", /* event_queue = */ nullptr, *cfg, "");
|
||||
EventQueue event_queue;
|
||||
SchedulerNodePtr fair = SchedulerNodeFactory::instance().get("fair", &event_queue, *cfg, "");
|
||||
EXPECT_TRUE(dynamic_cast<FairPolicy *>(fair.get()) != nullptr);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerFairPolicy, FairnessWeights)
|
||||
TEST(SchedulerFairPolicy, FairnessWeights)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -43,7 +42,7 @@ TEST(DISABLED_SchedulerFairPolicy, FairnessWeights)
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerFairPolicy, Activation)
|
||||
TEST(SchedulerFairPolicy, Activation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -79,7 +78,7 @@ TEST(DISABLED_SchedulerFairPolicy, Activation)
|
||||
t.consumed("B", 10);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerFairPolicy, FairnessMaxMin)
|
||||
TEST(SchedulerFairPolicy, FairnessMaxMin)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -103,7 +102,7 @@ TEST(DISABLED_SchedulerFairPolicy, FairnessMaxMin)
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerFairPolicy, HierarchicalFairness)
|
||||
TEST(SchedulerFairPolicy, HierarchicalFairness)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
|
@ -8,18 +8,17 @@ using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestClass;
|
||||
|
||||
/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
|
||||
|
||||
TEST(DISABLED_SchedulerPriorityPolicy, Factory)
|
||||
TEST(SchedulerPriorityPolicy, Factory)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
Poco::AutoPtr cfg = new Poco::Util::XMLConfiguration();
|
||||
SchedulerNodePtr prio = SchedulerNodeFactory::instance().get("priority", /* event_queue = */ nullptr, *cfg, "");
|
||||
EventQueue event_queue;
|
||||
SchedulerNodePtr prio = SchedulerNodeFactory::instance().get("priority", &event_queue, *cfg, "");
|
||||
EXPECT_TRUE(dynamic_cast<PriorityPolicy *>(prio.get()) != nullptr);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerPriorityPolicy, Priorities)
|
||||
TEST(SchedulerPriorityPolicy, Priorities)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -53,7 +52,7 @@ TEST(DISABLED_SchedulerPriorityPolicy, Priorities)
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerPriorityPolicy, Activation)
|
||||
TEST(SchedulerPriorityPolicy, Activation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
@ -94,7 +93,7 @@ TEST(DISABLED_SchedulerPriorityPolicy, Activation)
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerPriorityPolicy, SinglePriority)
|
||||
TEST(SchedulerPriorityPolicy, SinglePriority)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/SemaphoreConstraint.h>
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <Common/Scheduler/SchedulerRoot.h>
|
||||
@ -101,6 +102,11 @@ struct MyRequest : public ResourceRequest
|
||||
if (on_execute)
|
||||
on_execute();
|
||||
}
|
||||
|
||||
void failed(const std::exception_ptr &) override
|
||||
{
|
||||
FAIL();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(SchedulerRoot, Smoke)
|
||||
@ -108,14 +114,14 @@ TEST(SchedulerRoot, Smoke)
|
||||
ResourceTest t;
|
||||
|
||||
ResourceHolder r1(t);
|
||||
auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
auto * fc1 = r1.add<SemaphoreConstraint>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<PriorityPolicy>("/prio");
|
||||
auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
|
||||
auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
|
||||
r1.registerResource();
|
||||
|
||||
ResourceHolder r2(t);
|
||||
auto * fc2 = r2.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
auto * fc2 = r2.add<SemaphoreConstraint>("/", "<max_requests>1</max_requests>");
|
||||
r2.add<PriorityPolicy>("/prio");
|
||||
auto c = r2.addQueue("/prio/C", "<priority>-1</priority>");
|
||||
auto d = r2.addQueue("/prio/D", "<priority>-2</priority>");
|
||||
@ -123,25 +129,25 @@ TEST(SchedulerRoot, Smoke)
|
||||
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), a);
|
||||
EXPECT_TRUE(fc1->requests.contains(&rg.request));
|
||||
EXPECT_TRUE(fc1->getInflights().first == 1);
|
||||
rg.consume(1);
|
||||
}
|
||||
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), b);
|
||||
EXPECT_TRUE(fc1->requests.contains(&rg.request));
|
||||
EXPECT_TRUE(fc1->getInflights().first == 1);
|
||||
rg.consume(1);
|
||||
}
|
||||
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), c);
|
||||
EXPECT_TRUE(fc2->requests.contains(&rg.request));
|
||||
EXPECT_TRUE(fc2->getInflights().first == 1);
|
||||
rg.consume(1);
|
||||
}
|
||||
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), d);
|
||||
EXPECT_TRUE(fc2->requests.contains(&rg.request));
|
||||
EXPECT_TRUE(fc2->getInflights().first == 1);
|
||||
rg.consume(1);
|
||||
}
|
||||
}
|
||||
@ -151,7 +157,7 @@ TEST(SchedulerRoot, Budget)
|
||||
ResourceTest t;
|
||||
|
||||
ResourceHolder r1(t);
|
||||
r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<SemaphoreConstraint>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<PriorityPolicy>("/prio");
|
||||
auto a = r1.addQueue("/prio/A", "");
|
||||
r1.registerResource();
|
||||
@ -176,7 +182,7 @@ TEST(SchedulerRoot, Cancel)
|
||||
ResourceTest t;
|
||||
|
||||
ResourceHolder r1(t);
|
||||
auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
|
||||
auto * fc1 = r1.add<SemaphoreConstraint>("/", "<max_requests>1</max_requests>");
|
||||
r1.add<PriorityPolicy>("/prio");
|
||||
auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
|
||||
auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
|
||||
@ -189,7 +195,7 @@ TEST(SchedulerRoot, Cancel)
|
||||
MyRequest request(1,[&]
|
||||
{
|
||||
sync.arrive_and_wait(); // (A)
|
||||
EXPECT_TRUE(fc1->requests.contains(&request));
|
||||
EXPECT_TRUE(fc1->getInflights().first == 1);
|
||||
sync.arrive_and_wait(); // (B)
|
||||
request.finish();
|
||||
destruct_sync.arrive_and_wait(); // (C)
|
||||
@ -214,5 +220,5 @@ TEST(SchedulerRoot, Cancel)
|
||||
consumer1.join();
|
||||
consumer2.join();
|
||||
|
||||
EXPECT_TRUE(fc1->requests.empty());
|
||||
EXPECT_TRUE(fc1->getInflights().first == 0);
|
||||
}
|
||||
|
@ -10,9 +10,7 @@ using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestClass;
|
||||
|
||||
/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, LeakyBucketConstraint)
|
||||
TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -42,7 +40,7 @@ TEST(DISABLED_SchedulerThrottlerConstraint, LeakyBucketConstraint)
|
||||
t.consumed("A", 10);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, Unlimited)
|
||||
TEST(SchedulerThrottlerConstraint, Unlimited)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -59,7 +57,7 @@ TEST(DISABLED_SchedulerThrottlerConstraint, Unlimited)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, Pacing)
|
||||
TEST(SchedulerThrottlerConstraint, Pacing)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -79,7 +77,7 @@ TEST(DISABLED_SchedulerThrottlerConstraint, Pacing)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, BucketFilling)
|
||||
TEST(SchedulerThrottlerConstraint, BucketFilling)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -113,7 +111,7 @@ TEST(DISABLED_SchedulerThrottlerConstraint, BucketFilling)
|
||||
t.consumed("A", 3);
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, PeekAndAvgLimits)
|
||||
TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -141,7 +139,7 @@ TEST(DISABLED_SchedulerThrottlerConstraint, PeekAndAvgLimits)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DISABLED_SchedulerThrottlerConstraint, ThrottlerAndFairness)
|
||||
TEST(SchedulerThrottlerConstraint, ThrottlerAndFairness)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
@ -160,22 +158,22 @@ TEST(DISABLED_SchedulerThrottlerConstraint, ThrottlerAndFairness)
|
||||
t.enqueue("/fair/B", {req_cost});
|
||||
}
|
||||
|
||||
double shareA = 0.1;
|
||||
double shareB = 0.9;
|
||||
double share_a = 0.1;
|
||||
double share_b = 0.9;
|
||||
|
||||
// Bandwidth-latency coupling due to fairness: worst latency is inversely proportional to share
|
||||
auto max_latencyA = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareA));
|
||||
auto max_latencyB = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareB));
|
||||
auto max_latency_a = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / share_a));
|
||||
auto max_latency_b = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / share_b));
|
||||
|
||||
double consumedA = 0;
|
||||
double consumedB = 0;
|
||||
double consumed_a = 0;
|
||||
double consumed_b = 0;
|
||||
for (int seconds = 0; seconds < 100; seconds++)
|
||||
{
|
||||
t.process(start + std::chrono::seconds(seconds));
|
||||
double arrival_curve = 100.0 + 10.0 * seconds + req_cost;
|
||||
t.consumed("A", static_cast<ResourceCost>(arrival_curve * shareA - consumedA), max_latencyA);
|
||||
t.consumed("B", static_cast<ResourceCost>(arrival_curve * shareB - consumedB), max_latencyB);
|
||||
consumedA = arrival_curve * shareA;
|
||||
consumedB = arrival_curve * shareB;
|
||||
t.consumed("A", static_cast<ResourceCost>(arrival_curve * share_a - consumed_a), max_latency_a);
|
||||
t.consumed("B", static_cast<ResourceCost>(arrival_curve * share_b - consumed_b), max_latency_b);
|
||||
consumed_a = arrival_curve * share_a;
|
||||
consumed_b = arrival_curve * share_b;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,748 @@
|
||||
#include <chrono>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/Scheduler/ResourceGuard.h>
|
||||
#include <Common/Scheduler/ResourceLink.h>
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <Common/Priority.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestClass;
|
||||
|
||||
TEST(SchedulerUnifiedNode, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
t.addCustom<UnifiedSchedulerNode>("/", SchedulingSettings{});
|
||||
|
||||
t.enqueue("/fifo", {10, 10});
|
||||
t.dequeue(2);
|
||||
t.consumed("fifo", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessWeight)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 3.0, .priority = Priority{}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 30);
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 30);
|
||||
|
||||
t.dequeue();
|
||||
t.consumed("A", 60);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessActivation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
auto c = t.createUnifiedNode("C", all);
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10});
|
||||
t.enqueue(c, {10, 10});
|
||||
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 30);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.enqueue(b, {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed("B", 10);
|
||||
|
||||
t.enqueue(c, {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(2); // A B or B A
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessMaxMin)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessHierarchical)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto x = t.createUnifiedNode("X", all);
|
||||
auto y = t.createUnifiedNode("Y", all);
|
||||
auto a = t.createUnifiedNode("A", x);
|
||||
auto b = t.createUnifiedNode("B", x);
|
||||
auto c = t.createUnifiedNode("C", y);
|
||||
auto d = t.createUnifiedNode("D", y);
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("B", 40);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 40);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("D", 40);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("D", 40);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, Priority)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.priority = Priority{3}});
|
||||
auto b = t.createUnifiedNode("B", all, {.priority = Priority{2}});
|
||||
auto c = t.createUnifiedNode("C", all, {.priority = Priority{1}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.dequeue();
|
||||
t.consumed("A", 30);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, PriorityActivation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.priority = Priority{3}});
|
||||
auto b = t.createUnifiedNode("B", all, {.priority = Priority{2}});
|
||||
auto c = t.createUnifiedNode("C", all, {.priority = Priority{1}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10});
|
||||
t.enqueue(c, {10, 10});
|
||||
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.enqueue(b, {10, 10, 10});
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.enqueue(c, {10, 10});
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, List)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
std::list<UnifiedSchedulerNodePtr> list;
|
||||
list.push_back(t.createUnifiedNode("all"));
|
||||
|
||||
for (int length = 1; length < 5; length++)
|
||||
{
|
||||
String name = fmt::format("L{}", length);
|
||||
list.push_back(t.createUnifiedNode(name, list.back()));
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
t.enqueue(list.back(), {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
t.enqueue(list.back(), {10, 10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
}
|
||||
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerLeakyBucket)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 20.0});
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 30); // It is allowed to go below zero for exactly one resource request
|
||||
|
||||
t.process(start + std::chrono::seconds(1));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(2));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(3));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(4));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerPacing)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
// Zero burst allows you to send one request of any `size` and than throttle for `size/max_speed` seconds.
|
||||
// Useful if outgoing traffic should be "paced", i.e. have the least possible burstiness.
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 1.0, .max_burst = 0.0});
|
||||
|
||||
t.enqueue(all, {1, 2, 3, 1, 2, 1});
|
||||
int output[] = {1, 2, 0, 3, 0, 0, 1, 2, 0, 1, 0};
|
||||
for (int i = 0; i < std::size(output); i++)
|
||||
{
|
||||
t.process(start + std::chrono::seconds(i));
|
||||
t.consumed("all", output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerBucketFilling)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
|
||||
t.enqueue(all, {100});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 100); // consume all tokens, but it is still active (not negative)
|
||||
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 0); // There was nothing to consume
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 60); // 5 sec * 10 tokens/sec = 50 tokens + 1 extra request to go below zero
|
||||
|
||||
t.process(start + std::chrono::seconds(100));
|
||||
t.consumed("all", 40); // Consume rest
|
||||
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
|
||||
t.enqueue(all, {95, 1, 1, 1, 1, 1, 1, 1, 1, 1});
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
t.consumed("all", 101); // check we cannot consume more than max_burst + 1 request
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 3);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerAndFairness)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 10.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 90.0, .priority = Priority{}});
|
||||
|
||||
ResourceCost req_cost = 1;
|
||||
ResourceCost total_cost = 2000;
|
||||
for (int i = 0; i < total_cost / req_cost; i++)
|
||||
{
|
||||
t.enqueue(a, {req_cost});
|
||||
t.enqueue(b, {req_cost});
|
||||
}
|
||||
|
||||
double share_a = 0.1;
|
||||
double share_b = 0.9;
|
||||
|
||||
// Bandwidth-latency coupling due to fairness: worst latency is inversely proportional to share
|
||||
auto max_latency_a = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / share_a));
|
||||
auto max_latency_b = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / share_b));
|
||||
|
||||
double consumed_a = 0;
|
||||
double consumed_b = 0;
|
||||
for (int seconds = 0; seconds < 100; seconds++)
|
||||
{
|
||||
t.process(start + std::chrono::seconds(seconds));
|
||||
double arrival_curve = 100.0 + 10.0 * seconds + req_cost;
|
||||
t.consumed("A", static_cast<ResourceCost>(arrival_curve * share_a - consumed_a), max_latency_a);
|
||||
t.consumed("B", static_cast<ResourceCost>(arrival_curve * share_b - consumed_b), max_latency_b);
|
||||
consumed_a = arrival_curve * share_a;
|
||||
consumed_b = arrival_curve * share_b;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, QueueWithRequestsDestruction)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
|
||||
t.enqueue(all, {10, 10}); // enqueue reqeuests to be canceled
|
||||
|
||||
// This will destroy the queue and fail both requests
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
t.failed(20);
|
||||
|
||||
// Check that everything works fine after destruction
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ResourceGuardException)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
|
||||
t.enqueue(all, {10, 10}); // enqueue reqeuests to be canceled
|
||||
|
||||
std::thread consumer([queue = all->getQueue()]
|
||||
{
|
||||
ResourceLink link{.queue = queue.get()};
|
||||
bool caught = false;
|
||||
try
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), link);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
caught = true;
|
||||
}
|
||||
ASSERT_TRUE(caught);
|
||||
});
|
||||
|
||||
// This will destroy the queue and fail both requests
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
t.failed(20);
|
||||
consumer.join();
|
||||
|
||||
// Check that everything works fine after destruction
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdateWeight)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 3.0, .priority = Priority{}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 30);
|
||||
|
||||
t.updateUnifiedNode(b, all, all, {.weight = 1.0, .priority = Priority{}});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdatePriority)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 1.0, .priority = Priority{}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
|
||||
t.updateUnifiedNode(a, all, all, {.weight = 1.0, .priority = Priority{-1}});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 0);
|
||||
|
||||
t.updateUnifiedNode(b, all, all, {.weight = 1.0, .priority = Priority{-2}});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 20);
|
||||
|
||||
t.updateUnifiedNode(a, all, all, {.weight = 1.0, .priority = Priority{-2}});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdateParentOfLeafNode)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{1}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 1.0, .priority = Priority{2}});
|
||||
auto x = t.createUnifiedNode("X", a, {});
|
||||
auto y = t.createUnifiedNode("Y", b, {});
|
||||
|
||||
t.enqueue(x, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(y, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("X", 20);
|
||||
t.consumed("Y", 0);
|
||||
|
||||
t.updateUnifiedNode(x, a, b, {});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("X", 10);
|
||||
t.consumed("Y", 10);
|
||||
|
||||
t.updateUnifiedNode(y, b, a, {});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("X", 0);
|
||||
t.consumed("Y", 20);
|
||||
|
||||
t.updateUnifiedNode(y, a, all, {});
|
||||
t.updateUnifiedNode(x, b, all, {});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X", 20);
|
||||
t.consumed("Y", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdatePriorityOfIntermediateNode)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{1}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 1.0, .priority = Priority{2}});
|
||||
auto x1 = t.createUnifiedNode("X1", a, {});
|
||||
auto y1 = t.createUnifiedNode("Y1", b, {});
|
||||
auto x2 = t.createUnifiedNode("X2", a, {});
|
||||
auto y2 = t.createUnifiedNode("Y2", b, {});
|
||||
|
||||
t.enqueue(x1, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(y1, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(x2, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(y2, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 20);
|
||||
t.consumed("Y1", 0);
|
||||
t.consumed("X2", 20);
|
||||
t.consumed("Y2", 0);
|
||||
|
||||
t.updateUnifiedNode(a, all, all, {.weight = 1.0, .priority = Priority{2}});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 10);
|
||||
t.consumed("Y1", 10);
|
||||
t.consumed("X2", 10);
|
||||
t.consumed("Y2", 10);
|
||||
|
||||
t.updateUnifiedNode(b, all, all, {.weight = 1.0, .priority = Priority{1}});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 0);
|
||||
t.consumed("Y1", 20);
|
||||
t.consumed("X2", 0);
|
||||
t.consumed("Y2", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdateParentOfIntermediateNode)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{1}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 1.0, .priority = Priority{2}});
|
||||
auto c = t.createUnifiedNode("C", a, {});
|
||||
auto d = t.createUnifiedNode("D", b, {});
|
||||
auto x1 = t.createUnifiedNode("X1", c, {});
|
||||
auto y1 = t.createUnifiedNode("Y1", d, {});
|
||||
auto x2 = t.createUnifiedNode("X2", c, {});
|
||||
auto y2 = t.createUnifiedNode("Y2", d, {});
|
||||
|
||||
t.enqueue(x1, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(y1, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(x2, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(y2, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 20);
|
||||
t.consumed("Y1", 0);
|
||||
t.consumed("X2", 20);
|
||||
t.consumed("Y2", 0);
|
||||
|
||||
t.updateUnifiedNode(c, a, b, {});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 10);
|
||||
t.consumed("Y1", 10);
|
||||
t.consumed("X2", 10);
|
||||
t.consumed("Y2", 10);
|
||||
|
||||
t.updateUnifiedNode(d, b, a, {});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("X1", 0);
|
||||
t.consumed("Y1", 20);
|
||||
t.consumed("X2", 0);
|
||||
t.consumed("Y2", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdateThrottlerMaxSpeed)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 20.0});
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 30); // It is allowed to go below zero for exactly one resource request
|
||||
|
||||
t.process(start + std::chrono::seconds(1));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(2));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.updateUnifiedNode(all, {}, {}, {.priority = Priority{}, .max_speed = 1.0, .max_burst = 20.0});
|
||||
|
||||
t.process(start + std::chrono::seconds(12));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(22));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, UpdateThrottlerMaxBurst)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
|
||||
t.enqueue(all, {100});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 100); // consume all tokens, but it is still active (not negative)
|
||||
|
||||
t.process(start + std::chrono::seconds(2));
|
||||
t.consumed("all", 0); // There was nothing to consume
|
||||
t.updateUnifiedNode(all, {}, {}, {.priority = Priority{}, .max_speed = 10.0, .max_burst = 30.0});
|
||||
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 0); // There was nothing to consume
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 40); // min(30 tokens, 5 sec * 10 tokens/sec) = 30 tokens + 1 extra request to go below zero
|
||||
|
||||
t.updateUnifiedNode(all, {}, {}, {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
|
||||
t.process(start + std::chrono::seconds(100));
|
||||
t.consumed("all", 60); // Consume rest
|
||||
|
||||
t.process(start + std::chrono::seconds(150));
|
||||
t.updateUnifiedNode(all, {}, {}, {.priority = Priority{}, .max_speed = 100.0, .max_burst = 200.0});
|
||||
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
|
||||
t.enqueue(all, {195, 1, 1, 1, 1, 1, 1, 1, 1, 1});
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
t.consumed("all", 201); // check we cannot consume more than max_burst + 1 request
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 3);
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <Common/CurrentMetrics.h>
|
||||
|
||||
#include <condition_variable>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
@ -34,6 +35,11 @@ namespace CurrentMetrics
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_ACCESS_DENIED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scoped resource guard.
|
||||
* Waits for resource to be available in constructor and releases resource in destructor
|
||||
@ -109,12 +115,25 @@ public:
|
||||
dequeued_cv.notify_one();
|
||||
}
|
||||
|
||||
// This function is executed inside scheduler thread and wakes thread that issued this `request`.
|
||||
// That thread will throw an exception.
|
||||
void failed(const std::exception_ptr & ptr) override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
chassert(state == Enqueued);
|
||||
state = Dequeued;
|
||||
exception = ptr;
|
||||
dequeued_cv.notify_one();
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
CurrentMetrics::Increment scheduled(metrics->scheduled_count);
|
||||
auto timer = CurrentThread::getProfileEvents().timer(metrics->wait_microseconds);
|
||||
std::unique_lock lock(mutex);
|
||||
dequeued_cv.wait(lock, [this] { return state == Dequeued; });
|
||||
if (exception)
|
||||
throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Resource request failed: {}", getExceptionMessage(exception, /* with_stacktrace = */ false));
|
||||
}
|
||||
|
||||
void finish(ResourceCost real_cost_, ResourceLink link_)
|
||||
@ -151,6 +170,7 @@ public:
|
||||
std::mutex mutex;
|
||||
std::condition_variable dequeued_cv;
|
||||
RequestState state = Finished;
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
/// Creates pending request for resource; blocks while resource is not available (unless `Lock::Defer`)
|
||||
|
@ -1,55 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ErrorCodes.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
}
|
||||
|
||||
class ResourceManagerFactory : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
static ResourceManagerFactory & instance()
|
||||
{
|
||||
static ResourceManagerFactory ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ResourceManagerPtr get(const String & name)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (auto iter = methods.find(name); iter != methods.end())
|
||||
return iter->second();
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name);
|
||||
}
|
||||
|
||||
template <class TDerived>
|
||||
void registerMethod(const String & name)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
methods[name] = [] ()
|
||||
{
|
||||
return std::make_shared<TDerived>();
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
using Method = std::function<ResourceManagerPtr()>;
|
||||
std::unordered_map<String, Method> methods;
|
||||
};
|
||||
|
||||
}
|
@ -1,13 +1,34 @@
|
||||
#include <Common/Scheduler/ResourceRequest.h>
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <ranges>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void ResourceRequest::finish()
|
||||
{
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
// Iterate over constraints in reverse order
|
||||
for (ISchedulerConstraint * constraint : std::ranges::reverse_view(constraints))
|
||||
{
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
}
|
||||
}
|
||||
|
||||
bool ResourceRequest::addConstraint(ISchedulerConstraint * new_constraint)
|
||||
{
|
||||
for (auto & constraint : constraints)
|
||||
{
|
||||
if (!constraint)
|
||||
{
|
||||
constraint = new_constraint;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,9 @@
|
||||
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <base/types.h>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <exception>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,6 +17,9 @@ class ISchedulerConstraint;
|
||||
using ResourceCost = Int64;
|
||||
constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
|
||||
|
||||
/// Max number of constraints for a request to pass though (depth of constraints chain)
|
||||
constexpr size_t ResourceMaxConstraints = 8;
|
||||
|
||||
/*
|
||||
* Request for a resource consumption. The main moving part of the scheduling subsystem.
|
||||
* Resource requests processing workflow:
|
||||
@ -39,8 +44,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
|
||||
*
|
||||
* Request can also be canceled before (3) using ISchedulerQueue::cancelRequest().
|
||||
* Returning false means it is too late for request to be canceled. It should be processed in a regular way.
|
||||
* Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
|
||||
* and step (6) MUST be omitted.
|
||||
* Returning true means successful cancel and therefore steps (4) and (5) are not going to happen.
|
||||
*/
|
||||
class ResourceRequest : public boost::intrusive::list_base_hook<>
|
||||
{
|
||||
@ -49,9 +53,10 @@ public:
|
||||
/// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
|
||||
ResourceCost cost;
|
||||
|
||||
/// Scheduler node to be notified on consumption finish
|
||||
/// Auto-filled during request enqueue/dequeue
|
||||
ISchedulerConstraint * constraint;
|
||||
/// Scheduler nodes to be notified on consumption finish
|
||||
/// Auto-filled during request dequeue
|
||||
/// Vector is not used to avoid allocations in the scheduler thread
|
||||
std::array<ISchedulerConstraint *, ResourceMaxConstraints> constraints;
|
||||
|
||||
explicit ResourceRequest(ResourceCost cost_ = 1)
|
||||
{
|
||||
@ -62,7 +67,8 @@ public:
|
||||
void reset(ResourceCost cost_)
|
||||
{
|
||||
cost = cost_;
|
||||
constraint = nullptr;
|
||||
for (auto & constraint : constraints)
|
||||
constraint = nullptr;
|
||||
// Note that list_base_hook should be reset independently (by intrusive list)
|
||||
}
|
||||
|
||||
@ -74,11 +80,18 @@ public:
|
||||
/// (e.g. setting an std::promise or creating a job in a thread pool)
|
||||
virtual void execute() = 0;
|
||||
|
||||
/// Callback to trigger an error in case if resource is unavailable.
|
||||
virtual void failed(const std::exception_ptr & ptr) = 0;
|
||||
|
||||
/// Stop resource consumption and notify resource scheduler.
|
||||
/// Should be called when resource consumption is finished by consumer.
|
||||
/// ResourceRequest should not be destructed or reset before calling to `finish()`.
|
||||
/// WARNING: this function MUST not be called if request was canceled.
|
||||
/// It is okay to call finish() even for failed and canceled requests (it will be no-op)
|
||||
void finish();
|
||||
|
||||
/// Is called from the scheduler thread to fill `constraints` chain
|
||||
/// Returns `true` iff constraint was added successfully
|
||||
bool addConstraint(ISchedulerConstraint * new_constraint);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -28,27 +28,27 @@ namespace ErrorCodes
|
||||
* Resource scheduler root node with a dedicated thread.
|
||||
* Immediate children correspond to different resources.
|
||||
*/
|
||||
class SchedulerRoot : public ISchedulerNode
|
||||
class SchedulerRoot final : public ISchedulerNode
|
||||
{
|
||||
private:
|
||||
struct TResource
|
||||
struct Resource
|
||||
{
|
||||
SchedulerNodePtr root;
|
||||
|
||||
// Intrusive cyclic list of active resources
|
||||
TResource * next = nullptr;
|
||||
TResource * prev = nullptr;
|
||||
Resource * next = nullptr;
|
||||
Resource * prev = nullptr;
|
||||
|
||||
explicit TResource(const SchedulerNodePtr & root_)
|
||||
explicit Resource(const SchedulerNodePtr & root_)
|
||||
: root(root_)
|
||||
{
|
||||
root->info.parent.ptr = this;
|
||||
}
|
||||
|
||||
// Get pointer stored by ctor in info
|
||||
static TResource * get(SchedulerNodeInfo & info)
|
||||
static Resource * get(SchedulerNodeInfo & info)
|
||||
{
|
||||
return reinterpret_cast<TResource *>(info.parent.ptr);
|
||||
return reinterpret_cast<Resource *>(info.parent.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
@ -60,6 +60,8 @@ public:
|
||||
~SchedulerRoot() override
|
||||
{
|
||||
stop();
|
||||
while (!children.empty())
|
||||
removeChild(children.begin()->first);
|
||||
}
|
||||
|
||||
/// Runs separate scheduler thread
|
||||
@ -95,6 +97,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("scheduler");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -179,16 +187,11 @@ public:
|
||||
|
||||
void activateChild(ISchedulerNode * child) override
|
||||
{
|
||||
activate(TResource::get(child->info));
|
||||
}
|
||||
|
||||
void setParent(ISchedulerNode *) override
|
||||
{
|
||||
abort(); // scheduler must be the root and this function should not be called
|
||||
activate(Resource::get(child->info));
|
||||
}
|
||||
|
||||
private:
|
||||
void activate(TResource * value)
|
||||
void activate(Resource * value)
|
||||
{
|
||||
assert(value->next == nullptr && value->prev == nullptr);
|
||||
if (current == nullptr) // No active children
|
||||
@ -206,7 +209,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void deactivate(TResource * value)
|
||||
void deactivate(Resource * value)
|
||||
{
|
||||
if (value->next == nullptr)
|
||||
return; // Already deactivated
|
||||
@ -251,8 +254,8 @@ private:
|
||||
request->execute();
|
||||
}
|
||||
|
||||
TResource * current = nullptr; // round-robin pointer
|
||||
std::unordered_map<ISchedulerNode *, TResource> children; // resources by pointer
|
||||
Resource * current = nullptr; // round-robin pointer
|
||||
std::unordered_map<ISchedulerNode *, Resource> children; // resources by pointer
|
||||
std::atomic<bool> stop_flag = false;
|
||||
EventQueue events;
|
||||
ThreadFromGlobalPool scheduler;
|
||||
|
130
src/Common/Scheduler/SchedulingSettings.cpp
Normal file
130
src/Common/Scheduler/SchedulingSettings.cpp
Normal file
@ -0,0 +1,130 @@
|
||||
#include <limits>
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Scheduler/ISchedulerNode.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void SchedulingSettings::updateFromChanges(const ASTCreateWorkloadQuery::SettingsChanges & changes, const String & resource_name)
|
||||
{
|
||||
struct {
|
||||
std::optional<Float64> new_weight;
|
||||
std::optional<Priority> new_priority;
|
||||
std::optional<Float64> new_max_speed;
|
||||
std::optional<Float64> new_max_burst;
|
||||
std::optional<Int64> new_max_requests;
|
||||
std::optional<Int64> new_max_cost;
|
||||
|
||||
static Float64 getNotNegativeFloat64(const String & name, const Field & field)
|
||||
{
|
||||
{
|
||||
UInt64 val;
|
||||
if (field.tryGet(val))
|
||||
return static_cast<Float64>(val); // We dont mind slight loss of precision
|
||||
}
|
||||
|
||||
{
|
||||
Int64 val;
|
||||
if (field.tryGet(val))
|
||||
{
|
||||
if (val < 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected negative Int64 value for workload setting '{}'", name);
|
||||
return static_cast<Float64>(val); // We dont mind slight loss of precision
|
||||
}
|
||||
}
|
||||
|
||||
return field.safeGet<Float64>();
|
||||
}
|
||||
|
||||
static Int64 getNotNegativeInt64(const String & name, const Field & field)
|
||||
{
|
||||
{
|
||||
UInt64 val;
|
||||
if (field.tryGet(val))
|
||||
{
|
||||
// Saturate on overflow
|
||||
if (val > static_cast<UInt64>(std::numeric_limits<Int64>::max()))
|
||||
val = std::numeric_limits<Int64>::max();
|
||||
return static_cast<Int64>(val);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
Int64 val;
|
||||
if (field.tryGet(val))
|
||||
{
|
||||
if (val < 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected negative Int64 value for workload setting '{}'", name);
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
return field.safeGet<Int64>();
|
||||
}
|
||||
|
||||
void read(const String & name, const Field & value)
|
||||
{
|
||||
if (name == "weight")
|
||||
new_weight = getNotNegativeFloat64(name, value);
|
||||
else if (name == "priority")
|
||||
new_priority = Priority{value.safeGet<Priority::Value>()};
|
||||
else if (name == "max_speed")
|
||||
new_max_speed = getNotNegativeFloat64(name, value);
|
||||
else if (name == "max_burst")
|
||||
new_max_burst = getNotNegativeFloat64(name, value);
|
||||
else if (name == "max_requests")
|
||||
new_max_requests = getNotNegativeInt64(name, value);
|
||||
else if (name == "max_cost")
|
||||
new_max_cost = getNotNegativeInt64(name, value);
|
||||
}
|
||||
} regular, specific;
|
||||
|
||||
// Read changed setting values
|
||||
for (const auto & [name, value, resource] : changes)
|
||||
{
|
||||
if (resource.empty())
|
||||
regular.read(name, value);
|
||||
else if (resource == resource_name)
|
||||
specific.read(name, value);
|
||||
}
|
||||
|
||||
auto get_value = [] <typename T> (const std::optional<T> & specific_new, const std::optional<T> & regular_new, T & old)
|
||||
{
|
||||
if (specific_new)
|
||||
return *specific_new;
|
||||
if (regular_new)
|
||||
return *regular_new;
|
||||
return old;
|
||||
};
|
||||
|
||||
// Validate that we could use values read in a scheduler node
|
||||
{
|
||||
SchedulerNodeInfo validating_node(
|
||||
get_value(specific.new_weight, regular.new_weight, weight),
|
||||
get_value(specific.new_priority, regular.new_priority, priority));
|
||||
}
|
||||
|
||||
// Commit new values.
|
||||
// Previous values are left intentionally for ALTER query to be able to skip not mentioned setting values
|
||||
weight = get_value(specific.new_weight, regular.new_weight, weight);
|
||||
priority = get_value(specific.new_priority, regular.new_priority, priority);
|
||||
if (specific.new_max_speed || regular.new_max_speed)
|
||||
{
|
||||
max_speed = get_value(specific.new_max_speed, regular.new_max_speed, max_speed);
|
||||
// We always set max_burst if max_speed is changed.
|
||||
// This is done for users to be able to ignore more advanced max_burst setting and rely only on max_speed
|
||||
max_burst = default_burst_seconds * max_speed;
|
||||
}
|
||||
max_burst = get_value(specific.new_max_burst, regular.new_max_burst, max_burst);
|
||||
max_requests = get_value(specific.new_max_requests, regular.new_max_requests, max_requests);
|
||||
max_cost = get_value(specific.new_max_cost, regular.new_max_cost, max_cost);
|
||||
}
|
||||
|
||||
}
|
39
src/Common/Scheduler/SchedulingSettings.h
Normal file
39
src/Common/Scheduler/SchedulingSettings.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
#include <Common/Priority.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct SchedulingSettings
|
||||
{
|
||||
/// Priority and weight among siblings
|
||||
Float64 weight = 1.0;
|
||||
Priority priority;
|
||||
|
||||
/// Throttling constraints.
|
||||
/// Up to 2 independent throttlers: one for average speed and one for peek speed.
|
||||
static constexpr Float64 default_burst_seconds = 1.0;
|
||||
Float64 max_speed = 0; // Zero means unlimited
|
||||
Float64 max_burst = 0; // default is `default_burst_seconds * max_speed`
|
||||
|
||||
/// Limits total number of concurrent resource requests that are allowed to consume
|
||||
static constexpr Int64 default_max_requests = std::numeric_limits<Int64>::max();
|
||||
Int64 max_requests = default_max_requests;
|
||||
|
||||
/// Limits total cost of concurrent resource requests that are allowed to consume
|
||||
static constexpr Int64 default_max_cost = std::numeric_limits<Int64>::max();
|
||||
Int64 max_cost = default_max_cost;
|
||||
|
||||
bool hasThrottler() const { return max_speed != 0; }
|
||||
bool hasSemaphore() const { return max_requests != default_max_requests || max_cost != default_max_cost; }
|
||||
|
||||
void updateFromChanges(const ASTCreateWorkloadQuery::SettingsChanges & changes, const String & resource_name = {});
|
||||
};
|
||||
|
||||
}
|
91
src/Common/Scheduler/Workload/IWorkloadEntityStorage.h
Normal file
91
src/Common/Scheduler/Workload/IWorkloadEntityStorage.h
Normal file
@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IAST;
|
||||
struct Settings;
|
||||
|
||||
enum class WorkloadEntityType : uint8_t
|
||||
{
|
||||
Workload,
|
||||
Resource,
|
||||
|
||||
MAX
|
||||
};
|
||||
|
||||
/// Interface for a storage of workload entities (WORKLOAD and RESOURCE).
|
||||
class IWorkloadEntityStorage
|
||||
{
|
||||
public:
|
||||
virtual ~IWorkloadEntityStorage() = default;
|
||||
|
||||
/// Whether this storage can replicate entities to another node.
|
||||
virtual bool isReplicated() const { return false; }
|
||||
virtual String getReplicationID() const { return ""; }
|
||||
|
||||
/// Loads all entities. Can be called once - if entities are already loaded the function does nothing.
|
||||
virtual void loadEntities() = 0;
|
||||
|
||||
/// Get entity by name. If no entity stored with entity_name throws exception.
|
||||
virtual ASTPtr get(const String & entity_name) const = 0;
|
||||
|
||||
/// Get entity by name. If no entity stored with entity_name return nullptr.
|
||||
virtual ASTPtr tryGet(const String & entity_name) const = 0;
|
||||
|
||||
/// Check if entity with entity_name is stored.
|
||||
virtual bool has(const String & entity_name) const = 0;
|
||||
|
||||
/// Get all entity names.
|
||||
virtual std::vector<String> getAllEntityNames() const = 0;
|
||||
|
||||
/// Get all entity names of specified type.
|
||||
virtual std::vector<String> getAllEntityNames(WorkloadEntityType entity_type) const = 0;
|
||||
|
||||
/// Get all entities.
|
||||
virtual std::vector<std::pair<String, ASTPtr>> getAllEntities() const = 0;
|
||||
|
||||
/// Check whether any entity have been stored.
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
/// Stops watching.
|
||||
virtual void stopWatching() {}
|
||||
|
||||
/// Stores an entity.
|
||||
virtual bool storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) = 0;
|
||||
|
||||
/// Removes an entity.
|
||||
virtual bool removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) = 0;
|
||||
|
||||
struct Event
|
||||
{
|
||||
WorkloadEntityType type;
|
||||
String name;
|
||||
ASTPtr entity; /// new or changed entity, null if removed
|
||||
};
|
||||
using OnChangedHandler = std::function<void(const std::vector<Event> &)>;
|
||||
|
||||
/// Gets all current entries, pass them through `handler` and subscribes for all later changes.
|
||||
virtual scope_guard getAllEntitiesAndSubscribe(const OnChangedHandler & handler) = 0;
|
||||
};
|
||||
|
||||
}
|
287
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.cpp
Normal file
287
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.cpp
Normal file
@ -0,0 +1,287 @@
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityDiskStorage.h>
|
||||
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/atomicRename.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/quoteString.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ParserCreateWorkloadQuery.h>
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Setting
|
||||
{
|
||||
extern const SettingsUInt64 max_parser_backtracks;
|
||||
extern const SettingsUInt64 max_parser_depth;
|
||||
extern const SettingsBool fsync_metadata;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DIRECTORY_DOESNT_EXIST;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr std::string_view workload_prefix = "workload_";
|
||||
constexpr std::string_view resource_prefix = "resource_";
|
||||
constexpr std::string_view sql_suffix = ".sql";
|
||||
|
||||
/// Converts a path to an absolute path and append it with a separator.
|
||||
String makeDirectoryPathCanonical(const String & directory_path)
|
||||
{
|
||||
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path);
|
||||
if (canonical_directory_path.has_filename())
|
||||
canonical_directory_path += std::filesystem::path::preferred_separator;
|
||||
return canonical_directory_path;
|
||||
}
|
||||
}
|
||||
|
||||
WorkloadEntityDiskStorage::WorkloadEntityDiskStorage(const ContextPtr & global_context_, const String & dir_path_)
|
||||
: WorkloadEntityStorageBase(global_context_)
|
||||
, dir_path{makeDirectoryPathCanonical(dir_path_)}
|
||||
{
|
||||
log = getLogger("WorkloadEntityDiskStorage");
|
||||
}
|
||||
|
||||
|
||||
ASTPtr WorkloadEntityDiskStorage::tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name)
|
||||
{
|
||||
return tryLoadEntity(entity_type, entity_name, getFilePath(entity_type, entity_name), /* check_file_exists= */ true);
|
||||
}
|
||||
|
||||
|
||||
ASTPtr WorkloadEntityDiskStorage::tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name, const String & path, bool check_file_exists)
|
||||
{
|
||||
LOG_DEBUG(log, "Loading workload entity {} from file {}", backQuote(entity_name), path);
|
||||
|
||||
try
|
||||
{
|
||||
if (check_file_exists && !fs::exists(path))
|
||||
return nullptr;
|
||||
|
||||
/// There is .sql file with workload entity creation statement.
|
||||
ReadBufferFromFile in(path);
|
||||
|
||||
String entity_create_query;
|
||||
readStringUntilEOF(entity_create_query, in);
|
||||
|
||||
auto parse = [&] (auto parser)
|
||||
{
|
||||
return parseQuery(
|
||||
parser,
|
||||
entity_create_query.data(),
|
||||
entity_create_query.data() + entity_create_query.size(),
|
||||
"",
|
||||
0,
|
||||
global_context->getSettingsRef()[Setting::max_parser_depth],
|
||||
global_context->getSettingsRef()[Setting::max_parser_backtracks]);
|
||||
};
|
||||
|
||||
switch (entity_type)
|
||||
{
|
||||
case WorkloadEntityType::Workload: return parse(ParserCreateWorkloadQuery());
|
||||
case WorkloadEntityType::Resource: return parse(ParserCreateResourceQuery());
|
||||
case WorkloadEntityType::MAX: return nullptr;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("while loading workload entity {} from path {}", backQuote(entity_name), path));
|
||||
return nullptr; /// Failed to load this entity, will ignore it
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::loadEntities()
|
||||
{
|
||||
if (!entities_loaded)
|
||||
loadEntitiesImpl();
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::loadEntitiesImpl()
|
||||
{
|
||||
LOG_INFO(log, "Loading workload entities from {}", dir_path);
|
||||
|
||||
if (!std::filesystem::exists(dir_path))
|
||||
{
|
||||
LOG_DEBUG(log, "The directory for workload entities ({}) does not exist: nothing to load", dir_path);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> entities_name_and_queries;
|
||||
|
||||
Poco::DirectoryIterator dir_end;
|
||||
for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it)
|
||||
{
|
||||
if (it->isDirectory())
|
||||
continue;
|
||||
|
||||
const String & file_name = it.name();
|
||||
|
||||
if (file_name.starts_with(workload_prefix) && file_name.ends_with(sql_suffix))
|
||||
{
|
||||
String name = unescapeForFileName(file_name.substr(
|
||||
workload_prefix.size(),
|
||||
file_name.size() - workload_prefix.size() - sql_suffix.size()));
|
||||
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
ASTPtr ast = tryLoadEntity(WorkloadEntityType::Workload, name, dir_path + it.name(), /* check_file_exists= */ false);
|
||||
if (ast)
|
||||
entities_name_and_queries.emplace_back(name, ast);
|
||||
}
|
||||
|
||||
if (file_name.starts_with(resource_prefix) && file_name.ends_with(sql_suffix))
|
||||
{
|
||||
String name = unescapeForFileName(file_name.substr(
|
||||
resource_prefix.size(),
|
||||
file_name.size() - resource_prefix.size() - sql_suffix.size()));
|
||||
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
ASTPtr ast = tryLoadEntity(WorkloadEntityType::Resource, name, dir_path + it.name(), /* check_file_exists= */ false);
|
||||
if (ast)
|
||||
entities_name_and_queries.emplace_back(name, ast);
|
||||
}
|
||||
}
|
||||
|
||||
setAllEntities(entities_name_and_queries);
|
||||
entities_loaded = true;
|
||||
|
||||
LOG_DEBUG(log, "Workload entities loaded");
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::createDirectory()
|
||||
{
|
||||
std::error_code create_dir_error_code;
|
||||
fs::create_directories(dir_path, create_dir_error_code);
|
||||
if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code)
|
||||
throw Exception(ErrorCodes::DIRECTORY_DOESNT_EXIST, "Couldn't create directory {} reason: '{}'",
|
||||
dir_path, create_dir_error_code.message());
|
||||
}
|
||||
|
||||
|
||||
WorkloadEntityStorageBase::OperationResult WorkloadEntityDiskStorage::storeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings)
|
||||
{
|
||||
createDirectory();
|
||||
String file_path = getFilePath(entity_type, entity_name);
|
||||
LOG_DEBUG(log, "Storing workload entity {} to file {}", backQuote(entity_name), file_path);
|
||||
|
||||
if (fs::exists(file_path))
|
||||
{
|
||||
if (throw_if_exists)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' already exists", entity_name);
|
||||
else if (!replace_if_exists)
|
||||
return OperationResult::Failed;
|
||||
}
|
||||
|
||||
|
||||
String temp_file_path = file_path + ".tmp";
|
||||
|
||||
try
|
||||
{
|
||||
WriteBufferFromFile out(temp_file_path);
|
||||
formatAST(*create_entity_query, out, false);
|
||||
writeChar('\n', out);
|
||||
out.next();
|
||||
if (settings[Setting::fsync_metadata])
|
||||
out.sync();
|
||||
out.close();
|
||||
|
||||
if (replace_if_exists)
|
||||
fs::rename(temp_file_path, file_path);
|
||||
else
|
||||
renameNoReplace(temp_file_path, file_path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
fs::remove(temp_file_path);
|
||||
throw;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Entity {} stored", backQuote(entity_name));
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
|
||||
|
||||
WorkloadEntityStorageBase::OperationResult WorkloadEntityDiskStorage::removeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists)
|
||||
{
|
||||
String file_path = getFilePath(entity_type, entity_name);
|
||||
LOG_DEBUG(log, "Removing workload entity {} stored in file {}", backQuote(entity_name), file_path);
|
||||
|
||||
bool existed = fs::remove(file_path);
|
||||
|
||||
if (!existed)
|
||||
{
|
||||
if (throw_if_not_exists)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' doesn't exist", entity_name);
|
||||
else
|
||||
return OperationResult::Failed;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Entity {} removed", backQuote(entity_name));
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
|
||||
|
||||
String WorkloadEntityDiskStorage::getFilePath(WorkloadEntityType entity_type, const String & entity_name) const
|
||||
{
|
||||
String file_path;
|
||||
switch (entity_type)
|
||||
{
|
||||
case WorkloadEntityType::Workload:
|
||||
{
|
||||
file_path = dir_path + "workload_" + escapeForFileName(entity_name) + ".sql";
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::Resource:
|
||||
{
|
||||
file_path = dir_path + "resource_" + escapeForFileName(entity_name) + ".sql";
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::MAX: break;
|
||||
}
|
||||
return file_path;
|
||||
}
|
||||
|
||||
}
|
44
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.h
Normal file
44
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.h
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Loads workload entities from a specified folder.
|
||||
class WorkloadEntityDiskStorage : public WorkloadEntityStorageBase
|
||||
{
|
||||
public:
|
||||
WorkloadEntityDiskStorage(const ContextPtr & global_context_, const String & dir_path_);
|
||||
void loadEntities() override;
|
||||
|
||||
private:
|
||||
OperationResult storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override;
|
||||
|
||||
OperationResult removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override;
|
||||
|
||||
void createDirectory();
|
||||
void loadEntitiesImpl();
|
||||
ASTPtr tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name);
|
||||
ASTPtr tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name, const String & file_path, bool check_file_exists);
|
||||
String getFilePath(WorkloadEntityType entity_type, const String & entity_name) const;
|
||||
|
||||
String dir_path;
|
||||
std::atomic<bool> entities_loaded = false;
|
||||
};
|
||||
|
||||
}
|
273
src/Common/Scheduler/Workload/WorkloadEntityKeeperStorage.cpp
Normal file
273
src/Common/Scheduler/Workload/WorkloadEntityKeeperStorage.cpp
Normal file
@ -0,0 +1,273 @@
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityKeeperStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ParserCreateWorkloadEntity.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <base/sleep.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace Setting
|
||||
{
|
||||
extern const SettingsUInt64 max_parser_backtracks;
|
||||
extern const SettingsUInt64 max_parser_depth;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
WorkloadEntityKeeperStorage::WorkloadEntityKeeperStorage(
|
||||
const ContextPtr & global_context_, const String & zookeeper_path_)
|
||||
: WorkloadEntityStorageBase(global_context_)
|
||||
, zookeeper_getter{[global_context_]() { return global_context_->getZooKeeper(); }}
|
||||
, zookeeper_path{zookeeper_path_}
|
||||
, watch{std::make_shared<WatchEvent>()}
|
||||
{
|
||||
log = getLogger("WorkloadEntityKeeperStorage");
|
||||
if (zookeeper_path.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must be non-empty");
|
||||
|
||||
if (zookeeper_path.back() == '/')
|
||||
zookeeper_path.pop_back();
|
||||
|
||||
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
|
||||
if (zookeeper_path.front() != '/')
|
||||
zookeeper_path = "/" + zookeeper_path;
|
||||
}
|
||||
|
||||
WorkloadEntityKeeperStorage::~WorkloadEntityKeeperStorage()
|
||||
{
|
||||
SCOPE_EXIT_SAFE(stopWatchingThread());
|
||||
}
|
||||
|
||||
void WorkloadEntityKeeperStorage::startWatchingThread()
|
||||
{
|
||||
if (!watching_flag.exchange(true))
|
||||
watching_thread = ThreadFromGlobalPool(&WorkloadEntityKeeperStorage::processWatchQueue, this);
|
||||
}
|
||||
|
||||
void WorkloadEntityKeeperStorage::stopWatchingThread()
|
||||
{
|
||||
if (watching_flag.exchange(false))
|
||||
{
|
||||
watch->cv.notify_one();
|
||||
if (watching_thread.joinable())
|
||||
watching_thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
zkutil::ZooKeeperPtr WorkloadEntityKeeperStorage::getZooKeeper()
|
||||
{
|
||||
auto [zookeeper, session_status] = zookeeper_getter.getZooKeeper();
|
||||
|
||||
if (session_status == zkutil::ZooKeeperCachingGetter::SessionStatus::New)
|
||||
{
|
||||
/// It's possible that we connected to different [Zoo]Keeper instance
|
||||
/// so we may read a bit stale state.
|
||||
zookeeper->sync(zookeeper_path);
|
||||
|
||||
createRootNodes(zookeeper);
|
||||
|
||||
auto lock = getLock();
|
||||
refreshEntities(zookeeper);
|
||||
}
|
||||
|
||||
return zookeeper;
|
||||
}
|
||||
|
||||
void WorkloadEntityKeeperStorage::loadEntities()
|
||||
{
|
||||
/// loadEntities() is called at start from Server::main(), so it's better not to stop here on no connection to ZooKeeper or any other error.
|
||||
/// However the watching thread must be started anyway in case the connection will be established later.
|
||||
try
|
||||
{
|
||||
auto lock = getLock();
|
||||
refreshEntities(getZooKeeper());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Failed to load workload entities");
|
||||
}
|
||||
startWatchingThread();
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityKeeperStorage::processWatchQueue()
|
||||
{
|
||||
LOG_DEBUG(log, "Started watching thread");
|
||||
setThreadName("WrkldEntWatch");
|
||||
|
||||
UInt64 handled = 0;
|
||||
while (watching_flag)
|
||||
{
|
||||
try
|
||||
{
|
||||
/// Re-initialize ZooKeeper session if expired
|
||||
getZooKeeper();
|
||||
|
||||
{
|
||||
std::unique_lock lock{watch->mutex};
|
||||
if (!watch->cv.wait_for(lock, std::chrono::seconds(10), [&] { return !watching_flag || handled != watch->triggered; }))
|
||||
continue;
|
||||
handled = watch->triggered;
|
||||
}
|
||||
|
||||
auto lock = getLock();
|
||||
refreshEntities(getZooKeeper());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Will try to restart watching thread after error");
|
||||
zookeeper_getter.resetCache();
|
||||
sleepForSeconds(5);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Stopped watching thread");
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityKeeperStorage::stopWatching()
|
||||
{
|
||||
stopWatchingThread();
|
||||
}
|
||||
|
||||
void WorkloadEntityKeeperStorage::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
// If node does not exist we consider it to be equal to empty node: no workload entities
|
||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||
}
|
||||
|
||||
WorkloadEntityStorageBase::OperationResult WorkloadEntityKeeperStorage::storeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool /*throw_if_exists*/,
|
||||
bool /*replace_if_exists*/,
|
||||
const Settings &)
|
||||
{
|
||||
LOG_DEBUG(log, "Storing workload entity {}", backQuote(entity_name));
|
||||
|
||||
String new_data = serializeAllEntities(Event{entity_type, entity_name, create_entity_query});
|
||||
auto zookeeper = getZooKeeper();
|
||||
|
||||
Coordination::Stat stat;
|
||||
auto code = zookeeper->trySet(zookeeper_path, new_data, current_version, &stat);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
refreshEntities(zookeeper);
|
||||
return OperationResult::Retry;
|
||||
}
|
||||
|
||||
current_version = stat.version;
|
||||
|
||||
LOG_DEBUG(log, "Workload entity {} stored", backQuote(entity_name));
|
||||
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
|
||||
|
||||
WorkloadEntityStorageBase::OperationResult WorkloadEntityKeeperStorage::removeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool /*throw_if_not_exists*/)
|
||||
{
|
||||
LOG_DEBUG(log, "Removing workload entity {}", backQuote(entity_name));
|
||||
|
||||
String new_data = serializeAllEntities(Event{entity_type, entity_name, {}});
|
||||
auto zookeeper = getZooKeeper();
|
||||
|
||||
Coordination::Stat stat;
|
||||
auto code = zookeeper->trySet(zookeeper_path, new_data, current_version, &stat);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
refreshEntities(zookeeper);
|
||||
return OperationResult::Retry;
|
||||
}
|
||||
|
||||
current_version = stat.version;
|
||||
|
||||
LOG_DEBUG(log, "Workload entity {} removed", backQuote(entity_name));
|
||||
|
||||
return OperationResult::Ok;
|
||||
}
|
||||
|
||||
std::pair<String, Int32> WorkloadEntityKeeperStorage::getDataAndSetWatch(const zkutil::ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
const auto data_watcher = [my_watch = watch](const Coordination::WatchResponse & response)
|
||||
{
|
||||
if (response.type == Coordination::Event::CHANGED)
|
||||
{
|
||||
std::unique_lock lock{my_watch->mutex};
|
||||
my_watch->triggered++;
|
||||
my_watch->cv.notify_one();
|
||||
}
|
||||
};
|
||||
|
||||
Coordination::Stat stat;
|
||||
String data;
|
||||
bool exists = zookeeper->tryGetWatch(zookeeper_path, data, &stat, data_watcher);
|
||||
if (!exists)
|
||||
{
|
||||
createRootNodes(zookeeper);
|
||||
data = zookeeper->getWatch(zookeeper_path, &stat, data_watcher);
|
||||
}
|
||||
return {data, stat.version};
|
||||
}
|
||||
|
||||
void WorkloadEntityKeeperStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
auto [data, version] = getDataAndSetWatch(zookeeper);
|
||||
if (version == current_version)
|
||||
return;
|
||||
|
||||
LOG_DEBUG(log, "Refreshing workload entities from keeper");
|
||||
ASTs queries;
|
||||
ParserCreateWorkloadEntity parser;
|
||||
const char * begin = data.data(); /// begin of current query
|
||||
const char * pos = begin; /// parser moves pos from begin to the end of current query
|
||||
const char * end = begin + data.size();
|
||||
while (pos < end)
|
||||
{
|
||||
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS));
|
||||
while (isWhitespaceASCII(*pos) || *pos == ';')
|
||||
++pos;
|
||||
}
|
||||
|
||||
/// Read and parse all SQL entities from data we just read from ZooKeeper
|
||||
std::vector<std::pair<String, ASTPtr>> new_entities;
|
||||
for (const auto & query : queries)
|
||||
{
|
||||
LOG_TRACE(log, "Read keeper entity definition: {}", serializeAST(*query));
|
||||
if (auto * create_workload_query = query->as<ASTCreateWorkloadQuery>())
|
||||
new_entities.emplace_back(create_workload_query->getWorkloadName(), query);
|
||||
else if (auto * create_resource_query = query->as<ASTCreateResourceQuery>())
|
||||
new_entities.emplace_back(create_resource_query->getResourceName(), query);
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid workload entity query in keeper storage: {}", query->getID());
|
||||
}
|
||||
|
||||
setAllEntities(new_entities);
|
||||
current_version = version;
|
||||
|
||||
LOG_DEBUG(log, "Workload entities refreshing is done");
|
||||
}
|
||||
|
||||
}
|
||||
|
71
src/Common/Scheduler/Workload/WorkloadEntityKeeperStorage.h
Normal file
71
src/Common/Scheduler/Workload/WorkloadEntityKeeperStorage.h
Normal file
@ -0,0 +1,71 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCachingGetter.h>
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Loads RESOURCE and WORKLOAD sql objects from Keeper.
|
||||
class WorkloadEntityKeeperStorage : public WorkloadEntityStorageBase
|
||||
{
|
||||
public:
|
||||
WorkloadEntityKeeperStorage(const ContextPtr & global_context_, const String & zookeeper_path_);
|
||||
~WorkloadEntityKeeperStorage() override;
|
||||
|
||||
bool isReplicated() const override { return true; }
|
||||
String getReplicationID() const override { return zookeeper_path; }
|
||||
|
||||
void loadEntities() override;
|
||||
void stopWatching() override;
|
||||
|
||||
private:
|
||||
OperationResult storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override;
|
||||
|
||||
OperationResult removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override;
|
||||
|
||||
void processWatchQueue();
|
||||
|
||||
zkutil::ZooKeeperPtr getZooKeeper();
|
||||
|
||||
void startWatchingThread();
|
||||
void stopWatchingThread();
|
||||
|
||||
void createRootNodes(const zkutil::ZooKeeperPtr & zookeeper);
|
||||
std::pair<String, Int32> getDataAndSetWatch(const zkutil::ZooKeeperPtr & zookeeper);
|
||||
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
|
||||
|
||||
zkutil::ZooKeeperCachingGetter zookeeper_getter;
|
||||
String zookeeper_path;
|
||||
Int32 current_version = 0;
|
||||
|
||||
ThreadFromGlobalPool watching_thread;
|
||||
std::atomic<bool> watching_flag = false;
|
||||
|
||||
struct WatchEvent
|
||||
{
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
UInt64 triggered = 0;
|
||||
};
|
||||
std::shared_ptr<WatchEvent> watch;
|
||||
};
|
||||
|
||||
}
|
773
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.cpp
Normal file
773
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.cpp
Normal file
@ -0,0 +1,773 @@
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Removes details from a CREATE query to be used as workload entity definition
|
||||
ASTPtr normalizeCreateWorkloadEntityQuery(const IAST & create_query)
|
||||
{
|
||||
auto ptr = create_query.clone();
|
||||
if (auto * res = typeid_cast<ASTCreateWorkloadQuery *>(ptr.get()))
|
||||
{
|
||||
res->if_not_exists = false;
|
||||
res->or_replace = false;
|
||||
}
|
||||
if (auto * res = typeid_cast<ASTCreateResourceQuery *>(ptr.get()))
|
||||
{
|
||||
res->if_not_exists = false;
|
||||
res->or_replace = false;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/// Returns a type of a workload entity `ptr`
|
||||
WorkloadEntityType getEntityType(const ASTPtr & ptr)
|
||||
{
|
||||
if (auto * res = typeid_cast<ASTCreateWorkloadQuery *>(ptr.get()))
|
||||
return WorkloadEntityType::Workload;
|
||||
if (auto * res = typeid_cast<ASTCreateResourceQuery *>(ptr.get()))
|
||||
return WorkloadEntityType::Resource;
|
||||
chassert(false);
|
||||
return WorkloadEntityType::MAX;
|
||||
}
|
||||
|
||||
bool entityEquals(const ASTPtr & lhs, const ASTPtr & rhs)
|
||||
{
|
||||
if (auto * a = typeid_cast<ASTCreateWorkloadQuery *>(lhs.get()))
|
||||
{
|
||||
if (auto * b = typeid_cast<ASTCreateWorkloadQuery *>(rhs.get()))
|
||||
{
|
||||
return std::forward_as_tuple(a->getWorkloadName(), a->getWorkloadParent(), a->changes)
|
||||
== std::forward_as_tuple(b->getWorkloadName(), b->getWorkloadParent(), b->changes);
|
||||
}
|
||||
}
|
||||
if (auto * a = typeid_cast<ASTCreateResourceQuery *>(lhs.get()))
|
||||
{
|
||||
if (auto * b = typeid_cast<ASTCreateResourceQuery *>(rhs.get()))
|
||||
return std::forward_as_tuple(a->getResourceName(), a->operations)
|
||||
== std::forward_as_tuple(b->getResourceName(), b->operations);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Workload entities could reference each other.
|
||||
/// This enum defines all possible reference types
|
||||
enum class ReferenceType
|
||||
{
|
||||
Parent, // Source workload references target workload as a parent
|
||||
ForResource // Source workload references target resource in its `SETTINGS x = y FOR resource` clause
|
||||
};
|
||||
|
||||
/// Runs a `func` callback for every reference from `source` to `target`.
|
||||
/// This function is the source of truth defining what `target` references are stored in a workload `source_entity`
|
||||
void forEachReference(
|
||||
const ASTPtr & source_entity,
|
||||
std::function<void(const String & target, const String & source, ReferenceType type)> func)
|
||||
{
|
||||
if (auto * res = typeid_cast<ASTCreateWorkloadQuery *>(source_entity.get()))
|
||||
{
|
||||
// Parent reference
|
||||
String parent = res->getWorkloadParent();
|
||||
if (!parent.empty())
|
||||
func(parent, res->getWorkloadName(), ReferenceType::Parent);
|
||||
|
||||
// References to RESOURCEs mentioned in SETTINGS clause after FOR keyword
|
||||
std::unordered_set<String> resources;
|
||||
for (const auto & [name, value, resource] : res->changes)
|
||||
{
|
||||
if (!resource.empty())
|
||||
resources.insert(resource);
|
||||
}
|
||||
for (const String & resource : resources)
|
||||
func(resource, res->getWorkloadName(), ReferenceType::ForResource);
|
||||
}
|
||||
if (auto * res = typeid_cast<ASTCreateResourceQuery *>(source_entity.get()))
|
||||
{
|
||||
// RESOURCE has no references to be validated, we allow mentioned disks to be created later
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for recursive DFS
|
||||
void topologicallySortedWorkloadsImpl(const String & name, const ASTPtr & ast, const std::unordered_map<String, ASTPtr> & workloads, std::unordered_set<String> & visited, std::vector<std::pair<String, ASTPtr>> & sorted_workloads)
|
||||
{
|
||||
if (visited.contains(name))
|
||||
return;
|
||||
visited.insert(name);
|
||||
|
||||
// Recurse into parent (if any)
|
||||
String parent = typeid_cast<ASTCreateWorkloadQuery *>(ast.get())->getWorkloadParent();
|
||||
if (!parent.empty())
|
||||
{
|
||||
auto parent_iter = workloads.find(parent);
|
||||
if (parent_iter == workloads.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Workload metadata inconsistency: Workload '{}' parent '{}' does not exist. This must be fixed manually.", name, parent);
|
||||
topologicallySortedWorkloadsImpl(parent, parent_iter->second, workloads, visited, sorted_workloads);
|
||||
}
|
||||
|
||||
sorted_workloads.emplace_back(name, ast);
|
||||
}
|
||||
|
||||
/// Returns pairs {worload_name, create_workload_ast} in order that respect child-parent relation (parent first, then children)
|
||||
std::vector<std::pair<String, ASTPtr>> topologicallySortedWorkloads(const std::unordered_map<String, ASTPtr> & workloads)
|
||||
{
|
||||
std::vector<std::pair<String, ASTPtr>> sorted_workloads;
|
||||
std::unordered_set<String> visited;
|
||||
for (const auto & [name, ast] : workloads)
|
||||
topologicallySortedWorkloadsImpl(name, ast, workloads, visited, sorted_workloads);
|
||||
return sorted_workloads;
|
||||
}
|
||||
|
||||
/// Helper for recursive DFS
|
||||
void topologicallySortedDependenciesImpl(
|
||||
const String & name,
|
||||
const std::unordered_map<String, std::unordered_set<String>> & dependencies,
|
||||
std::unordered_set<String> & visited,
|
||||
std::vector<String> & result)
|
||||
{
|
||||
if (visited.contains(name))
|
||||
return;
|
||||
visited.insert(name);
|
||||
|
||||
if (auto it = dependencies.find(name); it != dependencies.end())
|
||||
{
|
||||
for (const String & dep : it->second)
|
||||
topologicallySortedDependenciesImpl(dep, dependencies, visited, result);
|
||||
}
|
||||
|
||||
result.emplace_back(name);
|
||||
}
|
||||
|
||||
/// Returns nodes in topological order that respect `dependencies` (key is node name, value is set of dependencies)
|
||||
std::vector<String> topologicallySortedDependencies(const std::unordered_map<String, std::unordered_set<String>> & dependencies)
|
||||
{
|
||||
std::unordered_set<String> visited; // Set to track visited nodes
|
||||
std::vector<String> result; // Result to store nodes in topologically sorted order
|
||||
|
||||
// Perform DFS for each node in the graph
|
||||
for (const auto & [name, _] : dependencies)
|
||||
topologicallySortedDependenciesImpl(name, dependencies, visited, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Represents a change of a workload entity (WORKLOAD or RESOURCE)
|
||||
struct EntityChange
|
||||
{
|
||||
String name; /// Name of entity
|
||||
ASTPtr before; /// Entity before change (CREATE if not set)
|
||||
ASTPtr after; /// Entity after change (DROP if not set)
|
||||
|
||||
std::vector<IWorkloadEntityStorage::Event> toEvents() const
|
||||
{
|
||||
if (!after)
|
||||
return {{getEntityType(before), name, {}}};
|
||||
else if (!before)
|
||||
return {{getEntityType(after), name, after}};
|
||||
else
|
||||
{
|
||||
auto type_before = getEntityType(before);
|
||||
auto type_after = getEntityType(after);
|
||||
// If type changed, we have to remove an old entity and add a new one
|
||||
if (type_before != type_after)
|
||||
return {{type_before, name, {}}, {type_after, name, after}};
|
||||
else
|
||||
return {{type_after, name, after}};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns `changes` ordered for execution.
|
||||
/// Every intemediate state during execution will be consistent (i.e. all references will be valid)
|
||||
/// NOTE: It does not validate changes, any problem will be detected during execution.
|
||||
/// NOTE: There will be no error if valid order does not exist.
|
||||
std::vector<EntityChange> topologicallySortedChanges(const std::vector<EntityChange> & changes)
|
||||
{
|
||||
// Construct map from entity name into entity change
|
||||
std::unordered_map<String, const EntityChange *> change_by_name;
|
||||
for (const auto & change : changes)
|
||||
change_by_name[change.name] = &change;
|
||||
|
||||
// Construct references maps (before changes and after changes)
|
||||
std::unordered_map<String, std::unordered_set<String>> old_sources; // Key is target. Value is set of names of source entities.
|
||||
std::unordered_map<String, std::unordered_set<String>> new_targets; // Key is source. Value is set of names of target entities.
|
||||
for (const auto & change : changes)
|
||||
{
|
||||
if (change.before)
|
||||
{
|
||||
forEachReference(change.before,
|
||||
[&] (const String & target, const String & source, ReferenceType)
|
||||
{
|
||||
old_sources[target].insert(source);
|
||||
});
|
||||
}
|
||||
if (change.after)
|
||||
{
|
||||
forEachReference(change.after,
|
||||
[&] (const String & target, const String & source, ReferenceType)
|
||||
{
|
||||
new_targets[source].insert(target);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// There are consistency rules that regulate order in which changes must be applied (see below).
|
||||
// Construct DAG of dependencies between changes.
|
||||
std::unordered_map<String, std::unordered_set<String>> dependencies; // Key is entity name. Value is set of names of entity that should be changed first.
|
||||
for (const auto & change : changes)
|
||||
{
|
||||
dependencies.emplace(change.name, std::unordered_set<String>{}); // Make sure we create nodes that have no dependencies
|
||||
for (const auto & event : change.toEvents())
|
||||
{
|
||||
if (!event.entity) // DROP
|
||||
{
|
||||
// Rule 1: Entity can only be removed after all existing references to it are removed as well.
|
||||
for (const String & source : old_sources[event.name])
|
||||
{
|
||||
if (change_by_name.contains(source))
|
||||
dependencies[event.name].insert(source);
|
||||
}
|
||||
}
|
||||
else // CREATE || CREATE OR REPLACE
|
||||
{
|
||||
// Rule 2: Entity can only be created after all entities it references are created as well.
|
||||
for (const String & target : new_targets[event.name])
|
||||
{
|
||||
if (auto it = change_by_name.find(target); it != change_by_name.end())
|
||||
{
|
||||
const EntityChange & target_change = *it->second;
|
||||
// If target is creating, it should be created first.
|
||||
// (But if target is updating, there is no dependency).
|
||||
if (!target_change.before)
|
||||
dependencies[event.name].insert(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Topological sort of changes to respect consistency rules
|
||||
std::vector<EntityChange> result;
|
||||
for (const String & name : topologicallySortedDependencies(dependencies))
|
||||
result.push_back(*change_by_name[name]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
WorkloadEntityStorageBase::WorkloadEntityStorageBase(ContextPtr global_context_)
|
||||
: handlers(std::make_shared<Handlers>())
|
||||
, global_context(std::move(global_context_))
|
||||
, log{getLogger("WorkloadEntityStorage")} // could be overridden in derived class
|
||||
{}
|
||||
|
||||
ASTPtr WorkloadEntityStorageBase::get(const String & entity_name) const
|
||||
{
|
||||
if (auto result = tryGet(entity_name))
|
||||
return result;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The workload entity name '{}' is not saved",
|
||||
entity_name);
|
||||
}
|
||||
|
||||
ASTPtr WorkloadEntityStorageBase::tryGet(const String & entity_name) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto it = entities.find(entity_name);
|
||||
if (it == entities.end())
|
||||
return nullptr;
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::has(const String & entity_name) const
|
||||
{
|
||||
return tryGet(entity_name) != nullptr;
|
||||
}
|
||||
|
||||
std::vector<String> WorkloadEntityStorageBase::getAllEntityNames() const
|
||||
{
|
||||
std::vector<String> entity_names;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
entity_names.reserve(entities.size());
|
||||
|
||||
for (const auto & [name, _] : entities)
|
||||
entity_names.emplace_back(name);
|
||||
|
||||
return entity_names;
|
||||
}
|
||||
|
||||
std::vector<String> WorkloadEntityStorageBase::getAllEntityNames(WorkloadEntityType entity_type) const
|
||||
{
|
||||
std::vector<String> entity_names;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
for (const auto & [name, entity] : entities)
|
||||
{
|
||||
if (getEntityType(entity) == entity_type)
|
||||
entity_names.emplace_back(name);
|
||||
}
|
||||
|
||||
return entity_names;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return entities.empty();
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings)
|
||||
{
|
||||
if (entity_name.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity name should not be empty.");
|
||||
|
||||
create_entity_query = normalizeCreateWorkloadEntityQuery(*create_entity_query);
|
||||
auto * workload = typeid_cast<ASTCreateWorkloadQuery *>(create_entity_query.get());
|
||||
auto * resource = typeid_cast<ASTCreateResourceQuery *>(create_entity_query.get());
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
ASTPtr old_entity; // entity to be REPLACED
|
||||
if (auto it = entities.find(entity_name); it != entities.end())
|
||||
{
|
||||
if (throw_if_exists)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' already exists", entity_name);
|
||||
else if (!replace_if_exists)
|
||||
return false;
|
||||
else
|
||||
old_entity = it->second;
|
||||
}
|
||||
|
||||
// Validate CREATE OR REPLACE
|
||||
if (old_entity)
|
||||
{
|
||||
auto * old_workload = typeid_cast<ASTCreateWorkloadQuery *>(old_entity.get());
|
||||
auto * old_resource = typeid_cast<ASTCreateResourceQuery *>(old_entity.get());
|
||||
if (workload && !old_workload)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' already exists, but it is not a workload", entity_name);
|
||||
if (resource && !old_resource)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' already exists, but it is not a resource", entity_name);
|
||||
if (workload && !old_workload->hasParent() && workload->hasParent())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "It is not allowed to remove root workload");
|
||||
}
|
||||
|
||||
// Validate workload
|
||||
if (workload)
|
||||
{
|
||||
if (!workload->hasParent())
|
||||
{
|
||||
if (!root_name.empty() && root_name != workload->getWorkloadName())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second root is not allowed. You should probably add 'PARENT {}' clause.", root_name);
|
||||
}
|
||||
|
||||
SchedulingSettings validator;
|
||||
validator.updateFromChanges(workload->changes);
|
||||
}
|
||||
|
||||
forEachReference(create_entity_query,
|
||||
[this, workload] (const String & target, const String & source, ReferenceType type)
|
||||
{
|
||||
if (auto it = entities.find(target); it == entities.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' references another workload entity '{}' that doesn't exist", source, target);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case ReferenceType::Parent:
|
||||
{
|
||||
if (typeid_cast<ASTCreateWorkloadQuery *>(entities[target].get()) == nullptr)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload parent should reference another workload, not '{}'.", target);
|
||||
break;
|
||||
}
|
||||
case ReferenceType::ForResource:
|
||||
{
|
||||
if (typeid_cast<ASTCreateResourceQuery *>(entities[target].get()) == nullptr)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload settings should reference resource in FOR clause, not '{}'.", target);
|
||||
|
||||
// Validate that we could parse the settings for specific resource
|
||||
SchedulingSettings validator;
|
||||
validator.updateFromChanges(workload->changes, target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Detect reference cycles.
|
||||
// The only way to create a cycle is to add an edge that will be a part of a new cycle.
|
||||
// We are going to add an edge: `source` -> `target`, so we ensure there is no path back `target` -> `source`.
|
||||
if (isIndirectlyReferenced(source, target))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity cycles are not allowed");
|
||||
});
|
||||
|
||||
auto result = storeEntityImpl(
|
||||
current_context,
|
||||
entity_type,
|
||||
entity_name,
|
||||
create_entity_query,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
settings);
|
||||
|
||||
if (result == OperationResult::Retry)
|
||||
continue; // Entities were updated, we need to rerun all the validations
|
||||
|
||||
if (result == OperationResult::Ok)
|
||||
{
|
||||
Event event{entity_type, entity_name, create_entity_query};
|
||||
applyEvent(lock, event);
|
||||
unlockAndNotify(lock, {std::move(event)});
|
||||
}
|
||||
|
||||
return result == OperationResult::Ok;
|
||||
}
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
auto it = entities.find(entity_name);
|
||||
if (it == entities.end())
|
||||
{
|
||||
if (throw_if_not_exists)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' doesn't exist", entity_name);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (auto reference_it = references.find(entity_name); reference_it != references.end())
|
||||
{
|
||||
String names;
|
||||
for (const String & name : reference_it->second)
|
||||
names += " " + name;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Workload entity '{}' cannot be dropped. It is referenced by:{}", entity_name, names);
|
||||
}
|
||||
|
||||
auto result = removeEntityImpl(
|
||||
current_context,
|
||||
entity_type,
|
||||
entity_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
if (result == OperationResult::Retry)
|
||||
continue; // Entities were updated, we need to rerun all the validations
|
||||
|
||||
if (result == OperationResult::Ok)
|
||||
{
|
||||
Event event{entity_type, entity_name, {}};
|
||||
applyEvent(lock, event);
|
||||
unlockAndNotify(lock, {std::move(event)});
|
||||
}
|
||||
|
||||
return result == OperationResult::Ok;
|
||||
}
|
||||
}
|
||||
|
||||
scope_guard WorkloadEntityStorageBase::getAllEntitiesAndSubscribe(const OnChangedHandler & handler)
|
||||
{
|
||||
scope_guard result;
|
||||
|
||||
std::vector<Event> current_state;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
current_state = orderEntities(entities);
|
||||
|
||||
std::lock_guard lock2{handlers->mutex};
|
||||
handlers->list.push_back(handler);
|
||||
auto handler_it = std::prev(handlers->list.end());
|
||||
result = [my_handlers = handlers, handler_it]
|
||||
{
|
||||
std::lock_guard lock3{my_handlers->mutex};
|
||||
my_handlers->list.erase(handler_it);
|
||||
};
|
||||
}
|
||||
|
||||
// When you subscribe you get all the entities back to your handler immediately if already loaded, or later when loaded
|
||||
handler(current_state);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::unlockAndNotify(
|
||||
std::unique_lock<std::recursive_mutex> & lock,
|
||||
std::vector<Event> tx)
|
||||
{
|
||||
if (tx.empty())
|
||||
return;
|
||||
|
||||
std::vector<OnChangedHandler> current_handlers;
|
||||
{
|
||||
std::lock_guard handlers_lock{handlers->mutex};
|
||||
boost::range::copy(handlers->list, std::back_inserter(current_handlers));
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
|
||||
for (const auto & handler : current_handlers)
|
||||
{
|
||||
try
|
||||
{
|
||||
handler(tx);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_lock<std::recursive_mutex> WorkloadEntityStorageBase::getLock() const
|
||||
{
|
||||
return std::unique_lock{mutex};
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::setAllEntities(const std::vector<std::pair<String, ASTPtr>> & raw_new_entities)
|
||||
{
|
||||
std::unordered_map<String, ASTPtr> new_entities;
|
||||
for (const auto & [entity_name, create_query] : raw_new_entities)
|
||||
new_entities[entity_name] = normalizeCreateWorkloadEntityQuery(*create_query);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
// Fill vector of `changes` based on difference between current `entities` and `new_entities`
|
||||
std::vector<EntityChange> changes;
|
||||
for (const auto & [entity_name, entity] : entities)
|
||||
{
|
||||
if (auto it = new_entities.find(entity_name); it != new_entities.end())
|
||||
{
|
||||
if (!entityEquals(entity, it->second))
|
||||
{
|
||||
changes.emplace_back(entity_name, entity, it->second); // Update entities that are present in both `new_entities` and `entities`
|
||||
LOG_TRACE(log, "Entity {} was updated", entity_name);
|
||||
}
|
||||
else
|
||||
LOG_TRACE(log, "Entity {} is the same", entity_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
changes.emplace_back(entity_name, entity, ASTPtr{}); // Remove entities that are not present in `new_entities`
|
||||
LOG_TRACE(log, "Entity {} was dropped", entity_name);
|
||||
}
|
||||
}
|
||||
for (const auto & [entity_name, entity] : new_entities)
|
||||
{
|
||||
if (!entities.contains(entity_name))
|
||||
{
|
||||
changes.emplace_back(entity_name, ASTPtr{}, entity); // Create entities that are only present in `new_entities`
|
||||
LOG_TRACE(log, "Entity {} was created", entity_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort `changes` to respect consistency of references and apply them one by one.
|
||||
std::vector<Event> tx;
|
||||
for (const auto & change : topologicallySortedChanges(changes))
|
||||
{
|
||||
for (const auto & event : change.toEvents())
|
||||
{
|
||||
// TODO(serxa): do validation and throw LOGICAL_ERROR if failed
|
||||
applyEvent(lock, event);
|
||||
tx.push_back(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Notify subscribers
|
||||
unlockAndNotify(lock, tx);
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::applyEvent(
|
||||
std::unique_lock<std::recursive_mutex> &,
|
||||
const Event & event)
|
||||
{
|
||||
if (event.entity) // CREATE || CREATE OR REPLACE
|
||||
{
|
||||
LOG_DEBUG(log, "Create or replace workload entity: {}", serializeAST(*event.entity));
|
||||
|
||||
auto * workload = typeid_cast<ASTCreateWorkloadQuery *>(event.entity.get());
|
||||
|
||||
// Validate workload
|
||||
if (workload && !workload->hasParent())
|
||||
root_name = workload->getWorkloadName();
|
||||
|
||||
// Remove references of a replaced entity (only for CREATE OR REPLACE)
|
||||
if (auto it = entities.find(event.name); it != entities.end())
|
||||
removeReferences(it->second);
|
||||
|
||||
// Insert references of created entity
|
||||
insertReferences(event.entity);
|
||||
|
||||
// Store in memory
|
||||
entities[event.name] = event.entity;
|
||||
}
|
||||
else // DROP
|
||||
{
|
||||
auto it = entities.find(event.name);
|
||||
chassert(it != entities.end());
|
||||
|
||||
LOG_DEBUG(log, "Drop workload entity: {}", event.name);
|
||||
|
||||
if (event.name == root_name)
|
||||
root_name.clear();
|
||||
|
||||
// Clean up references
|
||||
removeReferences(it->second);
|
||||
|
||||
// Remove from memory
|
||||
entities.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> WorkloadEntityStorageBase::getAllEntities() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
std::vector<std::pair<String, ASTPtr>> all_entities;
|
||||
all_entities.reserve(entities.size());
|
||||
std::copy(entities.begin(), entities.end(), std::back_inserter(all_entities));
|
||||
return all_entities;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::isIndirectlyReferenced(const String & target, const String & source)
|
||||
{
|
||||
std::queue<String> bfs;
|
||||
std::unordered_set<String> visited;
|
||||
visited.insert(target);
|
||||
bfs.push(target);
|
||||
while (!bfs.empty())
|
||||
{
|
||||
String current = bfs.front();
|
||||
bfs.pop();
|
||||
if (current == source)
|
||||
return true;
|
||||
if (auto it = references.find(current); it != references.end())
|
||||
{
|
||||
for (const String & node : it->second)
|
||||
{
|
||||
if (visited.contains(node))
|
||||
continue;
|
||||
visited.insert(node);
|
||||
bfs.push(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::insertReferences(const ASTPtr & entity)
|
||||
{
|
||||
if (!entity)
|
||||
return;
|
||||
forEachReference(entity,
|
||||
[this] (const String & target, const String & source, ReferenceType)
|
||||
{
|
||||
references[target].insert(source);
|
||||
});
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::removeReferences(const ASTPtr & entity)
|
||||
{
|
||||
if (!entity)
|
||||
return;
|
||||
forEachReference(entity,
|
||||
[this] (const String & target, const String & source, ReferenceType)
|
||||
{
|
||||
references[target].erase(source);
|
||||
if (references[target].empty())
|
||||
references.erase(target);
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<WorkloadEntityStorageBase::Event> WorkloadEntityStorageBase::orderEntities(
|
||||
const std::unordered_map<String, ASTPtr> & all_entities,
|
||||
std::optional<Event> change)
|
||||
{
|
||||
std::vector<Event> result;
|
||||
|
||||
std::unordered_map<String, ASTPtr> workloads;
|
||||
for (const auto & [entity_name, ast] : all_entities)
|
||||
{
|
||||
if (typeid_cast<ASTCreateWorkloadQuery *>(ast.get()))
|
||||
{
|
||||
if (change && change->name == entity_name)
|
||||
continue; // Skip this workload if it is removed or updated
|
||||
workloads.emplace(entity_name, ast);
|
||||
}
|
||||
else if (typeid_cast<ASTCreateResourceQuery *>(ast.get()))
|
||||
{
|
||||
if (change && change->name == entity_name)
|
||||
continue; // Skip this resource if it is removed or updated
|
||||
// Resources should go first because workloads could reference them
|
||||
result.emplace_back(WorkloadEntityType::Resource, entity_name, ast);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid workload entity type '{}'", ast->getID());
|
||||
}
|
||||
|
||||
// Introduce new entity described by `change`
|
||||
if (change && change->entity)
|
||||
{
|
||||
if (change->type == WorkloadEntityType::Workload)
|
||||
workloads.emplace(change->name, change->entity);
|
||||
else if (change->type == WorkloadEntityType::Resource)
|
||||
result.emplace_back(WorkloadEntityType::Resource, change->name, change->entity);
|
||||
}
|
||||
|
||||
// Workloads should go in an order such that children are enlisted only after its parent
|
||||
for (auto & [entity_name, ast] : topologicallySortedWorkloads(workloads))
|
||||
result.emplace_back(WorkloadEntityType::Workload, entity_name, ast);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
String WorkloadEntityStorageBase::serializeAllEntities(std::optional<Event> change)
|
||||
{
|
||||
std::unique_lock<std::recursive_mutex> lock;
|
||||
auto ordered_entities = orderEntities(entities, change);
|
||||
WriteBufferFromOwnString buf;
|
||||
for (const auto & event : ordered_entities)
|
||||
{
|
||||
formatAST(*event.entity, buf, false, true);
|
||||
buf.write(";\n", 2);
|
||||
}
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
}
|
126
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.h
Normal file
126
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.h
Normal file
@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class WorkloadEntityStorageBase : public IWorkloadEntityStorage
|
||||
{
|
||||
public:
|
||||
explicit WorkloadEntityStorageBase(ContextPtr global_context_);
|
||||
ASTPtr get(const String & entity_name) const override;
|
||||
|
||||
ASTPtr tryGet(const String & entity_name) const override;
|
||||
|
||||
bool has(const String & entity_name) const override;
|
||||
|
||||
std::vector<String> getAllEntityNames() const override;
|
||||
std::vector<String> getAllEntityNames(WorkloadEntityType entity_type) const override;
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> getAllEntities() const override;
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override;
|
||||
|
||||
bool removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override;
|
||||
|
||||
scope_guard getAllEntitiesAndSubscribe(
|
||||
const OnChangedHandler & handler) override;
|
||||
|
||||
protected:
|
||||
enum class OperationResult
|
||||
{
|
||||
Ok,
|
||||
Failed,
|
||||
Retry
|
||||
};
|
||||
|
||||
virtual OperationResult storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) = 0;
|
||||
|
||||
virtual OperationResult removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) = 0;
|
||||
|
||||
std::unique_lock<std::recursive_mutex> getLock() const;
|
||||
|
||||
/// Replace current `entities` with `new_entities` and notifies subscribers.
|
||||
/// Note that subscribers will be notified with a sequence of events.
|
||||
/// It is guaranteed that all itermediate states (between every pair of consecutive events)
|
||||
/// will be consistent (all references between entities will be valid)
|
||||
void setAllEntities(const std::vector<std::pair<String, ASTPtr>> & new_entities);
|
||||
|
||||
/// Serialize `entities` stored in memory plus one optional `change` into multiline string
|
||||
String serializeAllEntities(std::optional<Event> change = {});
|
||||
|
||||
private:
|
||||
/// Change state in memory
|
||||
void applyEvent(std::unique_lock<std::recursive_mutex> & lock, const Event & event);
|
||||
|
||||
/// Notify subscribers about changes describe by vector of events `tx`
|
||||
void unlockAndNotify(std::unique_lock<std::recursive_mutex> & lock, std::vector<Event> tx);
|
||||
|
||||
/// Return true iff `references` has a path from `source` to `target`
|
||||
bool isIndirectlyReferenced(const String & target, const String & source);
|
||||
|
||||
/// Adds references that are described by `entity` to `references`
|
||||
void insertReferences(const ASTPtr & entity);
|
||||
|
||||
/// Removes references that are described by `entity` from `references`
|
||||
void removeReferences(const ASTPtr & entity);
|
||||
|
||||
/// Returns an ordered vector of `entities`
|
||||
std::vector<Event> orderEntities(
|
||||
const std::unordered_map<String, ASTPtr> & all_entities,
|
||||
std::optional<Event> change = {});
|
||||
|
||||
struct Handlers
|
||||
{
|
||||
std::mutex mutex;
|
||||
std::list<OnChangedHandler> list;
|
||||
};
|
||||
/// shared_ptr is here for safety because WorkloadEntityStorageBase can be destroyed before all subscriptions are removed.
|
||||
std::shared_ptr<Handlers> handlers;
|
||||
|
||||
mutable std::recursive_mutex mutex;
|
||||
std::unordered_map<String, ASTPtr> entities; /// Maps entity name into CREATE entity query
|
||||
|
||||
// Validation
|
||||
std::unordered_map<String, std::unordered_set<String>> references; /// Keep track of references between entities. Key is target. Value is set of sources
|
||||
String root_name; /// current root workload name
|
||||
|
||||
protected:
|
||||
ContextPtr global_context;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
#include <Common/Scheduler/Workload/createWorkloadEntityStorage.h>
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityDiskStorage.h>
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityKeeperStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
}
|
||||
|
||||
std::unique_ptr<IWorkloadEntityStorage> createWorkloadEntityStorage(const ContextMutablePtr & global_context)
|
||||
{
|
||||
const String zookeeper_path_key = "workload_zookeeper_path";
|
||||
const String disk_path_key = "workload_path";
|
||||
|
||||
const auto & config = global_context->getConfigRef();
|
||||
if (config.has(zookeeper_path_key))
|
||||
{
|
||||
if (config.has(disk_path_key))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::INVALID_CONFIG_PARAMETER,
|
||||
"'{}' and '{}' must not be both specified in the config",
|
||||
zookeeper_path_key,
|
||||
disk_path_key);
|
||||
}
|
||||
return std::make_unique<WorkloadEntityKeeperStorage>(global_context, config.getString(zookeeper_path_key));
|
||||
}
|
||||
|
||||
String default_path = fs::path{global_context->getPath()} / "workload" / "";
|
||||
String path = config.getString(disk_path_key, default_path);
|
||||
return std::make_unique<WorkloadEntityDiskStorage>(global_context, path);
|
||||
}
|
||||
|
||||
}
|
11
src/Common/Scheduler/Workload/createWorkloadEntityStorage.h
Normal file
11
src/Common/Scheduler/Workload/createWorkloadEntityStorage.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::unique_ptr<IWorkloadEntityStorage> createWorkloadEntityStorage(const ContextMutablePtr & global_context);
|
||||
|
||||
}
|
104
src/Common/Scheduler/createResourceManager.cpp
Normal file
104
src/Common/Scheduler/createResourceManager.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
#include <Common/Scheduler/createResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/CustomResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/IOResourceManager.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_ACCESS_DENIED;
|
||||
}
|
||||
|
||||
class ResourceManagerDispatcher : public IResourceManager
|
||||
{
|
||||
private:
|
||||
class Classifier : public IClassifier
|
||||
{
|
||||
public:
|
||||
void addClassifier(const ClassifierPtr & classifier)
|
||||
{
|
||||
classifiers.push_back(classifier);
|
||||
}
|
||||
|
||||
bool has(const String & resource_name) override
|
||||
{
|
||||
for (const auto & classifier : classifiers)
|
||||
{
|
||||
if (classifier->has(resource_name))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ResourceLink get(const String & resource_name) override
|
||||
{
|
||||
for (auto & classifier : classifiers)
|
||||
{
|
||||
if (classifier->has(resource_name))
|
||||
return classifier->get(resource_name);
|
||||
}
|
||||
throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Access denied to resource '{}'", resource_name);
|
||||
}
|
||||
private:
|
||||
std::vector<ClassifierPtr> classifiers; // should be constant after initialization to avoid races
|
||||
};
|
||||
|
||||
public:
|
||||
void addManager(const ResourceManagerPtr & manager)
|
||||
{
|
||||
managers.push_back(manager);
|
||||
}
|
||||
|
||||
void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override
|
||||
{
|
||||
for (auto & manager : managers)
|
||||
manager->updateConfiguration(config);
|
||||
}
|
||||
|
||||
bool hasResource(const String & resource_name) const override
|
||||
{
|
||||
for (const auto & manager : managers)
|
||||
{
|
||||
if (manager->hasResource(resource_name))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ClassifierPtr acquire(const String & workload_name) override
|
||||
{
|
||||
auto classifier = std::make_shared<Classifier>();
|
||||
for (const auto & manager : managers)
|
||||
classifier->addClassifier(manager->acquire(workload_name));
|
||||
return classifier;
|
||||
}
|
||||
|
||||
void forEachNode(VisitorFunc visitor) override
|
||||
{
|
||||
for (const auto & manager : managers)
|
||||
manager->forEachNode(visitor);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<ResourceManagerPtr> managers; // Should be constant after initialization to avoid races
|
||||
};
|
||||
|
||||
ResourceManagerPtr createResourceManager(const ContextMutablePtr & global_context)
|
||||
{
|
||||
auto dispatcher = std::make_shared<ResourceManagerDispatcher>();
|
||||
|
||||
// NOTE: if the same resource is described by both managers, then manager added earlier will be used.
|
||||
dispatcher->addManager(std::make_shared<CustomResourceManager>());
|
||||
dispatcher->addManager(std::make_shared<IOResourceManager>(global_context->getWorkloadEntityStorage()));
|
||||
|
||||
return dispatcher;
|
||||
}
|
||||
|
||||
}
|
11
src/Common/Scheduler/createResourceManager.h
Normal file
11
src/Common/Scheduler/createResourceManager.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ResourceManagerPtr createResourceManager(const ContextMutablePtr & global_context);
|
||||
|
||||
}
|
@ -18,7 +18,8 @@
|
||||
#include <Disks/FakeDiskTransaction.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -71,8 +72,8 @@ DiskObjectStorage::DiskObjectStorage(
|
||||
, metadata_storage(std::move(metadata_storage_))
|
||||
, object_storage(std::move(object_storage_))
|
||||
, send_metadata(config.getBool(config_prefix + ".send_metadata", false))
|
||||
, read_resource_name(config.getString(config_prefix + ".read_resource", ""))
|
||||
, write_resource_name(config.getString(config_prefix + ".write_resource", ""))
|
||||
, read_resource_name_from_config(config.getString(config_prefix + ".read_resource", ""))
|
||||
, write_resource_name_from_config(config.getString(config_prefix + ".write_resource", ""))
|
||||
, metadata_helper(std::make_unique<DiskObjectStorageRemoteMetadataRestoreHelper>(this, ReadSettings{}, WriteSettings{}))
|
||||
{
|
||||
data_source_description = DataSourceDescription{
|
||||
@ -83,6 +84,98 @@ DiskObjectStorage::DiskObjectStorage(
|
||||
.is_encrypted = false,
|
||||
.is_cached = object_storage->supportsCache(),
|
||||
};
|
||||
resource_changes_subscription = Context::getGlobalContextInstance()->getWorkloadEntityStorage().getAllEntitiesAndSubscribe(
|
||||
[this] (const std::vector<IWorkloadEntityStorage::Event> & events)
|
||||
{
|
||||
std::unique_lock lock{resource_mutex};
|
||||
|
||||
// Sets of matching resource names. Required to resolve possible conflicts in deterministic way
|
||||
std::set<String> new_read_resource_name_from_sql;
|
||||
std::set<String> new_write_resource_name_from_sql;
|
||||
std::set<String> new_read_resource_name_from_sql_any;
|
||||
std::set<String> new_write_resource_name_from_sql_any;
|
||||
|
||||
// Current state
|
||||
if (!read_resource_name_from_sql.empty())
|
||||
new_read_resource_name_from_sql.insert(read_resource_name_from_sql);
|
||||
if (!write_resource_name_from_sql.empty())
|
||||
new_write_resource_name_from_sql.insert(write_resource_name_from_sql);
|
||||
if (!read_resource_name_from_sql_any.empty())
|
||||
new_read_resource_name_from_sql_any.insert(read_resource_name_from_sql_any);
|
||||
if (!write_resource_name_from_sql_any.empty())
|
||||
new_write_resource_name_from_sql_any.insert(write_resource_name_from_sql_any);
|
||||
|
||||
// Process all updates in specified order
|
||||
for (const auto & [entity_type, resource_name, resource] : events)
|
||||
{
|
||||
if (entity_type == WorkloadEntityType::Resource)
|
||||
{
|
||||
if (resource) // CREATE RESOURCE
|
||||
{
|
||||
auto * create = typeid_cast<ASTCreateResourceQuery *>(resource.get());
|
||||
chassert(create);
|
||||
for (const auto & [mode, disk] : create->operations)
|
||||
{
|
||||
if (!disk)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case ASTCreateResourceQuery::AccessMode::Read: new_read_resource_name_from_sql_any.insert(resource_name); break;
|
||||
case ASTCreateResourceQuery::AccessMode::Write: new_write_resource_name_from_sql_any.insert(resource_name); break;
|
||||
}
|
||||
}
|
||||
else if (*disk == name)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case ASTCreateResourceQuery::AccessMode::Read: new_read_resource_name_from_sql.insert(resource_name); break;
|
||||
case ASTCreateResourceQuery::AccessMode::Write: new_write_resource_name_from_sql.insert(resource_name); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else // DROP RESOURCE
|
||||
{
|
||||
new_read_resource_name_from_sql.erase(resource_name);
|
||||
new_write_resource_name_from_sql.erase(resource_name);
|
||||
new_read_resource_name_from_sql_any.erase(resource_name);
|
||||
new_write_resource_name_from_sql_any.erase(resource_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String old_read_resource = getReadResourceNameNoLock();
|
||||
String old_write_resource = getWriteResourceNameNoLock();
|
||||
|
||||
// Apply changes
|
||||
if (!new_read_resource_name_from_sql_any.empty())
|
||||
read_resource_name_from_sql_any = *new_read_resource_name_from_sql_any.begin();
|
||||
else
|
||||
read_resource_name_from_sql_any.clear();
|
||||
|
||||
if (!new_write_resource_name_from_sql_any.empty())
|
||||
write_resource_name_from_sql_any = *new_write_resource_name_from_sql_any.begin();
|
||||
else
|
||||
write_resource_name_from_sql_any.clear();
|
||||
|
||||
if (!new_read_resource_name_from_sql.empty())
|
||||
read_resource_name_from_sql = *new_read_resource_name_from_sql.begin();
|
||||
else
|
||||
read_resource_name_from_sql.clear();
|
||||
|
||||
if (!new_write_resource_name_from_sql.empty())
|
||||
write_resource_name_from_sql = *new_write_resource_name_from_sql.begin();
|
||||
else
|
||||
write_resource_name_from_sql.clear();
|
||||
|
||||
String new_read_resource = getReadResourceNameNoLock();
|
||||
String new_write_resource = getWriteResourceNameNoLock();
|
||||
|
||||
if (old_read_resource != new_read_resource)
|
||||
LOG_INFO(log, "Using resource '{}' instead of '{}' for READ", new_read_resource, old_read_resource);
|
||||
if (old_write_resource != new_write_resource)
|
||||
LOG_INFO(log, "Using resource '{}' instead of '{}' for WRITE", new_write_resource, old_write_resource);
|
||||
});
|
||||
}
|
||||
|
||||
StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) const
|
||||
@ -480,13 +573,29 @@ static inline Settings updateIOSchedulingSettings(const Settings & settings, con
|
||||
String DiskObjectStorage::getReadResourceName() const
|
||||
{
|
||||
std::unique_lock lock(resource_mutex);
|
||||
return read_resource_name;
|
||||
return getReadResourceNameNoLock();
|
||||
}
|
||||
|
||||
String DiskObjectStorage::getWriteResourceName() const
|
||||
{
|
||||
std::unique_lock lock(resource_mutex);
|
||||
return write_resource_name;
|
||||
return getWriteResourceNameNoLock();
|
||||
}
|
||||
|
||||
String DiskObjectStorage::getReadResourceNameNoLock() const
|
||||
{
|
||||
if (read_resource_name_from_config.empty())
|
||||
return read_resource_name_from_sql.empty() ? read_resource_name_from_sql_any : read_resource_name_from_sql;
|
||||
else
|
||||
return read_resource_name_from_config;
|
||||
}
|
||||
|
||||
String DiskObjectStorage::getWriteResourceNameNoLock() const
|
||||
{
|
||||
if (write_resource_name_from_config.empty())
|
||||
return write_resource_name_from_sql.empty() ? write_resource_name_from_sql_any : write_resource_name_from_sql;
|
||||
else
|
||||
return write_resource_name_from_config;
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
|
||||
@ -607,10 +716,10 @@ void DiskObjectStorage::applyNewSettings(
|
||||
|
||||
{
|
||||
std::unique_lock lock(resource_mutex);
|
||||
if (String new_read_resource_name = config.getString(config_prefix + ".read_resource", ""); new_read_resource_name != read_resource_name)
|
||||
read_resource_name = new_read_resource_name;
|
||||
if (String new_write_resource_name = config.getString(config_prefix + ".write_resource", ""); new_write_resource_name != write_resource_name)
|
||||
write_resource_name = new_write_resource_name;
|
||||
if (String new_read_resource_name = config.getString(config_prefix + ".read_resource", ""); new_read_resource_name != read_resource_name_from_config)
|
||||
read_resource_name_from_config = new_read_resource_name;
|
||||
if (String new_write_resource_name = config.getString(config_prefix + ".write_resource", ""); new_write_resource_name != write_resource_name_from_config)
|
||||
write_resource_name_from_config = new_write_resource_name;
|
||||
}
|
||||
|
||||
IDisk::applyNewSettings(config, context_, config_prefix, disk_map);
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include <Disks/ObjectStorages/IMetadataStorage.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
|
||||
@ -228,6 +230,8 @@ private:
|
||||
|
||||
String getReadResourceName() const;
|
||||
String getWriteResourceName() const;
|
||||
String getReadResourceNameNoLock() const;
|
||||
String getWriteResourceNameNoLock() const;
|
||||
|
||||
const String object_key_prefix;
|
||||
LoggerPtr log;
|
||||
@ -246,8 +250,13 @@ private:
|
||||
const bool send_metadata;
|
||||
|
||||
mutable std::mutex resource_mutex;
|
||||
String read_resource_name;
|
||||
String write_resource_name;
|
||||
String read_resource_name_from_config; // specified in disk config.xml read_resource element
|
||||
String write_resource_name_from_config; // specified in disk config.xml write_resource element
|
||||
String read_resource_name_from_sql; // described by CREATE RESOURCE query with READ DISK clause
|
||||
String write_resource_name_from_sql; // described by CREATE RESOURCE query with WRITE DISK clause
|
||||
String read_resource_name_from_sql_any; // described by CREATE RESOURCE query with READ ANY DISK clause
|
||||
String write_resource_name_from_sql_any; // described by CREATE RESOURCE query with WRITE ANY DISK clause
|
||||
scope_guard resource_changes_subscription;
|
||||
|
||||
std::unique_ptr<DiskObjectStorageRemoteMetadataRestoreHelper> metadata_helper;
|
||||
};
|
||||
|
@ -2,7 +2,9 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <shared_mutex>
|
||||
#include <base/defines.h>
|
||||
#include <Common/SharedLockGuard.h>
|
||||
@ -25,10 +27,19 @@ struct InMemoryDirectoryPathMap
|
||||
return path1 < path2;
|
||||
}
|
||||
};
|
||||
|
||||
using FileNames = std::set<std::string>;
|
||||
using FileNamesIterator = FileNames::iterator;
|
||||
struct FileNameIteratorComparator
|
||||
{
|
||||
bool operator()(const FileNames::iterator & lhs, const FileNames::iterator & rhs) const { return *lhs < *rhs; }
|
||||
};
|
||||
|
||||
struct RemotePathInfo
|
||||
{
|
||||
std::string path;
|
||||
time_t last_modified = 0;
|
||||
std::set<FileNamesIterator, FileNameIteratorComparator> filename_iterators;
|
||||
};
|
||||
|
||||
using Map = std::map<std::filesystem::path, RemotePathInfo, PathComparator>;
|
||||
@ -49,9 +60,11 @@ struct InMemoryDirectoryPathMap
|
||||
mutable SharedMutex mutex;
|
||||
|
||||
#ifdef OS_LINUX
|
||||
FileNames TSA_GUARDED_BY(mutex) unique_filenames;
|
||||
Map TSA_GUARDED_BY(mutex) map;
|
||||
/// std::shared_mutex may not be annotated with the 'capability' attribute in libcxx.
|
||||
#else
|
||||
FileNames unique_filenames;
|
||||
Map map;
|
||||
#endif
|
||||
};
|
||||
|
@ -220,6 +220,21 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::createEmptyMetadataFile(const std::string & path)
|
||||
{
|
||||
if (metadata_storage.object_storage->isWriteOnce())
|
||||
return;
|
||||
|
||||
addOperation(
|
||||
std::make_unique<MetadataStorageFromPlainObjectStorageWriteFileOperation>(path, *metadata_storage.getPathMap(), object_storage));
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::createMetadataFile(
|
||||
const std::string & path, ObjectStorageKey /*object_key*/, uint64_t /* size_in_bytes */)
|
||||
{
|
||||
createEmptyMetadataFile(path);
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string & path)
|
||||
{
|
||||
if (metadata_storage.object_storage->isWriteOnce())
|
||||
@ -252,12 +267,6 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::
|
||||
metadata_storage.getMetadataKeyPrefix()));
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata(
|
||||
const std::string &, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */)
|
||||
{
|
||||
/// Noop, local metadata files is only one file, it is the metadata file itself.
|
||||
}
|
||||
|
||||
UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string & path)
|
||||
{
|
||||
/// The record has become stale, remove it from cache.
|
||||
@ -269,8 +278,11 @@ UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromPlainObjectStorageTrans
|
||||
metadata_storage.object_metadata_cache->remove(hash.get128());
|
||||
}
|
||||
|
||||
/// No hardlinks, so will always remove file.
|
||||
return std::make_shared<UnlinkMetadataFileOperationOutcome>(UnlinkMetadataFileOperationOutcome{0});
|
||||
auto result = std::make_shared<UnlinkMetadataFileOperationOutcome>(UnlinkMetadataFileOperationOutcome{0});
|
||||
if (!metadata_storage.object_storage->isWriteOnce())
|
||||
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation>(
|
||||
path, *metadata_storage.getPathMap(), object_storage));
|
||||
return result;
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::commit()
|
||||
|
@ -114,22 +114,19 @@ public:
|
||||
|
||||
const IMetadataStorage & getStorageForNonTransactionalReads() const override;
|
||||
|
||||
void addBlobToMetadata(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes) override;
|
||||
void addBlobToMetadata(const std::string & /* path */, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) override
|
||||
{
|
||||
// Noop
|
||||
}
|
||||
|
||||
void setLastModified(const String &, const Poco::Timestamp &) override
|
||||
{
|
||||
/// Noop
|
||||
}
|
||||
|
||||
void createEmptyMetadataFile(const std::string & /* path */) override
|
||||
{
|
||||
/// No metadata, no need to create anything.
|
||||
}
|
||||
void createEmptyMetadataFile(const std::string & /* path */) override;
|
||||
|
||||
void createMetadataFile(const std::string & /* path */, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) override
|
||||
{
|
||||
/// Noop
|
||||
}
|
||||
void createMetadataFile(const std::string & /* path */, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) override;
|
||||
|
||||
void createDirectory(const std::string & path) override;
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include "MetadataStorageFromPlainObjectStorageOperations.h"
|
||||
#include <Disks/ObjectStorages/InMemoryDirectoryPathMap.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <mutex>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Timestamp.h>
|
||||
@ -76,7 +78,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
|
||||
std::lock_guard lock(path_map.mutex);
|
||||
auto & map = path_map.map;
|
||||
[[maybe_unused]] auto result
|
||||
= map.emplace(base_path, InMemoryDirectoryPathMap::RemotePathInfo{object_key_prefix, Poco::Timestamp{}.epochTime()});
|
||||
= map.emplace(base_path, InMemoryDirectoryPathMap::RemotePathInfo{object_key_prefix, Poco::Timestamp{}.epochTime(), {}});
|
||||
chassert(result.second);
|
||||
}
|
||||
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
|
||||
@ -287,4 +289,122 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
|
||||
CurrentMetrics::add(metric, 1);
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorageWriteFileOperation::MetadataStorageFromPlainObjectStorageWriteFileOperation(
|
||||
const std::string & path_, InMemoryDirectoryPathMap & path_map_, ObjectStoragePtr object_storage_)
|
||||
: path(path_), path_map(path_map_), object_storage(object_storage_)
|
||||
{
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageWriteFileOperation::execute(std::unique_lock<SharedMutex> &)
|
||||
{
|
||||
LOG_TEST(getLogger("MetadataStorageFromPlainObjectStorageWriteFileOperation"), "Creating metadata for a file '{}'", path);
|
||||
|
||||
std::lock_guard lock(path_map.mutex);
|
||||
|
||||
auto it = path_map.map.find(path.parent_path());
|
||||
/// Some paths (e.g., clickhouse_access_check) may not have parent directories.
|
||||
if (it == path_map.map.end())
|
||||
LOG_TRACE(
|
||||
getLogger("MetadataStorageFromPlainObjectStorageWriteFileOperation"),
|
||||
"Parent dirrectory does not exist, skipping path {}",
|
||||
path);
|
||||
else
|
||||
{
|
||||
auto [filename_it, inserted] = path_map.unique_filenames.emplace(path.filename());
|
||||
if (inserted)
|
||||
{
|
||||
auto metric = object_storage->getMetadataStorageMetrics().unique_filenames_count;
|
||||
CurrentMetrics::add(metric, 1);
|
||||
}
|
||||
written = it->second.filename_iterators.emplace(filename_it).second;
|
||||
if (written)
|
||||
{
|
||||
auto metric = object_storage->getMetadataStorageMetrics().file_count;
|
||||
CurrentMetrics::add(metric, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageWriteFileOperation::undo(std::unique_lock<SharedMutex> &)
|
||||
{
|
||||
if (written)
|
||||
{
|
||||
std::lock_guard lock(path_map.mutex);
|
||||
auto it = path_map.map.find(path.parent_path());
|
||||
chassert(it != path_map.map.end());
|
||||
if (it != path_map.map.end())
|
||||
{
|
||||
auto filename_it = path_map.unique_filenames.find(path.filename());
|
||||
if (filename_it != path_map.unique_filenames.end())
|
||||
{
|
||||
if (it->second.filename_iterators.erase(filename_it) > 0)
|
||||
{
|
||||
auto metric = object_storage->getMetadataStorageMetrics().file_count;
|
||||
CurrentMetrics::sub(metric, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation::MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation(
|
||||
std::filesystem::path && path_, InMemoryDirectoryPathMap & path_map_, ObjectStoragePtr object_storage_)
|
||||
: path(path_)
|
||||
, remote_path(std::filesystem::path(object_storage_->generateObjectKeyForPath(path_, std::nullopt).serialize()))
|
||||
, path_map(path_map_)
|
||||
, object_storage(object_storage_)
|
||||
{
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation::execute(std::unique_lock<SharedMutex> &)
|
||||
{
|
||||
LOG_TEST(
|
||||
getLogger("MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation"),
|
||||
"Unlinking metadata for a write '{}' with remote path '{}'",
|
||||
path,
|
||||
remote_path);
|
||||
|
||||
std::lock_guard lock(path_map.mutex);
|
||||
auto it = path_map.map.find(path.parent_path());
|
||||
if (it == path_map.map.end())
|
||||
LOG_TRACE(
|
||||
getLogger("MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation"),
|
||||
"Parent directory does not exist, skipping path {}",
|
||||
path);
|
||||
else
|
||||
{
|
||||
auto & filename_iterators = it->second.filename_iterators;
|
||||
auto filename_it = path_map.unique_filenames.find(path.filename());
|
||||
if (filename_it != path_map.unique_filenames.end())
|
||||
unlinked = (filename_iterators.erase(filename_it) > 0);
|
||||
|
||||
if (unlinked)
|
||||
{
|
||||
auto metric = object_storage->getMetadataStorageMetrics().file_count;
|
||||
CurrentMetrics::sub(metric, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation::undo(std::unique_lock<SharedMutex> &)
|
||||
{
|
||||
if (unlinked)
|
||||
{
|
||||
std::lock_guard lock(path_map.mutex);
|
||||
auto it = path_map.map.find(path.parent_path());
|
||||
chassert(it != path_map.map.end());
|
||||
if (it != path_map.map.end())
|
||||
{
|
||||
auto filename_it = path_map.unique_filenames.find(path.filename());
|
||||
if (filename_it != path_map.unique_filenames.end())
|
||||
{
|
||||
if (it->second.filename_iterators.emplace(filename_it).second)
|
||||
{
|
||||
auto metric = object_storage->getMetadataStorageMetrics().file_count;
|
||||
CurrentMetrics::add(metric, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,4 +87,38 @@ public:
|
||||
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
};
|
||||
|
||||
class MetadataStorageFromPlainObjectStorageWriteFileOperation final : public IMetadataOperation
|
||||
{
|
||||
private:
|
||||
std::filesystem::path path;
|
||||
InMemoryDirectoryPathMap & path_map;
|
||||
ObjectStoragePtr object_storage;
|
||||
|
||||
bool written = false;
|
||||
|
||||
public:
|
||||
MetadataStorageFromPlainObjectStorageWriteFileOperation(
|
||||
const std::string & path, InMemoryDirectoryPathMap & path_map_, ObjectStoragePtr object_storage_);
|
||||
|
||||
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
};
|
||||
|
||||
class MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation final : public IMetadataOperation
|
||||
{
|
||||
private:
|
||||
std::filesystem::path path;
|
||||
std::filesystem::path remote_path;
|
||||
InMemoryDirectoryPathMap & path_map;
|
||||
ObjectStoragePtr object_storage;
|
||||
|
||||
bool unlinked = false;
|
||||
|
||||
public:
|
||||
MetadataStorageFromPlainObjectStorageUnlinkMetadataFileOperation(
|
||||
std::filesystem::path && path_, InMemoryDirectoryPathMap & path_map_, ObjectStoragePtr object_storage_);
|
||||
|
||||
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
};
|
||||
}
|
||||
|
@ -3,18 +3,24 @@
|
||||
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
|
||||
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <any>
|
||||
#include <cstddef>
|
||||
#include <exception>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
#include <Poco/Timestamp.h>
|
||||
#include "Common/Exception.h"
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/SharedLockGuard.h>
|
||||
#include <Common/SharedMutex.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include "CommonPathPrefixKeyGenerator.h"
|
||||
|
||||
|
||||
@ -45,6 +51,61 @@ std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage)
|
||||
: metadata_key_prefix;
|
||||
}
|
||||
|
||||
void loadDirectoryTree(
|
||||
InMemoryDirectoryPathMap::Map & map, InMemoryDirectoryPathMap::FileNames & unique_filenames, ObjectStoragePtr object_storage)
|
||||
{
|
||||
using FileNamesIterator = InMemoryDirectoryPathMap::FileNamesIterator;
|
||||
using FileNameIteratorComparator = InMemoryDirectoryPathMap::FileNameIteratorComparator;
|
||||
const auto common_key_prefix = object_storage->getCommonKeyPrefix();
|
||||
ThreadPool & pool = getIOThreadPool().get();
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(pool, "PlainRWTreeLoad");
|
||||
|
||||
std::atomic<size_t> num_files = 0;
|
||||
LOG_DEBUG(getLogger("MetadataStorageFromPlainObjectStorage"), "Loading directory tree");
|
||||
std::mutex mutex;
|
||||
for (auto & item : map)
|
||||
{
|
||||
auto & remote_path_info = item.second;
|
||||
const auto remote_path = std::filesystem::path(common_key_prefix) / remote_path_info.path / "";
|
||||
runner(
|
||||
[remote_path, &mutex, &remote_path_info, &unique_filenames, &object_storage, &num_files]
|
||||
{
|
||||
setThreadName("PlainRWTreeLoad");
|
||||
std::set<FileNamesIterator, FileNameIteratorComparator> filename_iterators;
|
||||
for (auto iterator = object_storage->iterate(remote_path, 0); iterator->isValid(); iterator->next())
|
||||
{
|
||||
auto file = iterator->current();
|
||||
String path = file->getPath();
|
||||
chassert(path.starts_with(remote_path.string()));
|
||||
auto filename = std::filesystem::path(path).filename();
|
||||
/// Check that the file is a direct child.
|
||||
if (path.substr(remote_path.string().size()) == filename)
|
||||
{
|
||||
auto filename_it = unique_filenames.end();
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
filename_it = unique_filenames.emplace(filename).first;
|
||||
}
|
||||
auto inserted = filename_iterators.emplace(filename_it).second;
|
||||
chassert(inserted);
|
||||
if (inserted)
|
||||
++num_files;
|
||||
}
|
||||
}
|
||||
|
||||
auto metric = object_storage->getMetadataStorageMetrics().file_count;
|
||||
CurrentMetrics::add(metric, filename_iterators.size());
|
||||
remote_path_info.filename_iterators = std::move(filename_iterators);
|
||||
});
|
||||
}
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
LOG_DEBUG(
|
||||
getLogger("MetadataStorageFromPlainObjectStorage"),
|
||||
"Loaded directory tree for {} directories, found {} files",
|
||||
map.size(),
|
||||
num_files);
|
||||
}
|
||||
|
||||
std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage)
|
||||
{
|
||||
auto result = std::make_shared<InMemoryDirectoryPathMap>();
|
||||
@ -62,6 +123,9 @@ std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string &
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata");
|
||||
size_t num_files = 0;
|
||||
|
||||
std::mutex mutex;
|
||||
InMemoryDirectoryPathMap::Map map;
|
||||
for (auto iterator = object_storage->iterate(metadata_key_prefix, 0); iterator->isValid(); iterator->next())
|
||||
{
|
||||
++num_files;
|
||||
@ -72,7 +136,7 @@ std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string &
|
||||
continue;
|
||||
|
||||
runner(
|
||||
[remote_metadata_path, path, &object_storage, &result, &log, &settings, &metadata_key_prefix]
|
||||
[remote_metadata_path, path, &object_storage, &mutex, &map, &log, &settings, &metadata_key_prefix]
|
||||
{
|
||||
setThreadName("PlainRWMetaLoad");
|
||||
|
||||
@ -109,13 +173,13 @@ std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string &
|
||||
chassert(remote_metadata_path.has_parent_path());
|
||||
chassert(remote_metadata_path.string().starts_with(metadata_key_prefix));
|
||||
auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size());
|
||||
auto remote_path = std::filesystem::path(std::move(suffix));
|
||||
auto rel_path = std::filesystem::path(std::move(suffix));
|
||||
std::pair<Map::iterator, bool> res;
|
||||
{
|
||||
std::lock_guard lock(result->mutex);
|
||||
res = result->map.emplace(
|
||||
std::lock_guard lock(mutex);
|
||||
res = map.emplace(
|
||||
std::filesystem::path(local_path).parent_path(),
|
||||
InMemoryDirectoryPathMap::RemotePathInfo{remote_path.parent_path(), last_modified.epochTime()});
|
||||
InMemoryDirectoryPathMap::RemotePathInfo{rel_path.parent_path(), last_modified.epochTime(), {}});
|
||||
}
|
||||
|
||||
/// This can happen if table replication is enabled, then the same local path is written
|
||||
@ -126,14 +190,19 @@ std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string &
|
||||
"The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'",
|
||||
local_path,
|
||||
res.first->second.path,
|
||||
remote_path.parent_path().string());
|
||||
rel_path.parent_path().string());
|
||||
});
|
||||
}
|
||||
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
|
||||
InMemoryDirectoryPathMap::FileNames unique_filenames;
|
||||
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, map.size());
|
||||
loadDirectoryTree(map, unique_filenames, object_storage);
|
||||
{
|
||||
SharedLockGuard lock(result->mutex);
|
||||
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result->map.size());
|
||||
std::lock_guard lock(result->mutex);
|
||||
result->map = std::move(map);
|
||||
result->unique_filenames = std::move(unique_filenames);
|
||||
|
||||
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
|
||||
CurrentMetrics::add(metric, result->map.size());
|
||||
@ -141,55 +210,6 @@ std::shared_ptr<InMemoryDirectoryPathMap> loadPathPrefixMap(const std::string &
|
||||
return result;
|
||||
}
|
||||
|
||||
void getDirectChildrenOnDiskImpl(
|
||||
const std::string & storage_key,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
const InMemoryDirectoryPathMap & path_map,
|
||||
std::unordered_set<std::string> & result)
|
||||
{
|
||||
/// Directories are retrieved from the in-memory path map.
|
||||
{
|
||||
SharedLockGuard lock(path_map.mutex);
|
||||
const auto & local_path_prefixes = path_map.map;
|
||||
const auto end_it = local_path_prefixes.end();
|
||||
for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it)
|
||||
{
|
||||
const auto & [k, _] = std::make_tuple(it->first.string(), it->second);
|
||||
if (!k.starts_with(local_path))
|
||||
break;
|
||||
|
||||
auto slash_num = count(k.begin() + local_path.size(), k.end(), '/');
|
||||
/// The local_path_prefixes comparator ensures that the paths with the smallest number of
|
||||
/// hops from the local_path are iterated first. The paths do not end with '/', hence
|
||||
/// break the loop if the number of slashes is greater than 0.
|
||||
if (slash_num != 0)
|
||||
break;
|
||||
|
||||
result.emplace(std::string(k.begin() + local_path.size(), k.end()) + "/");
|
||||
}
|
||||
}
|
||||
|
||||
/// Files.
|
||||
auto skip_list = std::set<std::string>{PREFIX_PATH_FILE_NAME};
|
||||
for (const auto & elem : remote_paths)
|
||||
{
|
||||
const auto & path = elem->relative_path;
|
||||
chassert(path.find(storage_key) == 0);
|
||||
const auto child_pos = storage_key.size();
|
||||
|
||||
auto slash_pos = path.find('/', child_pos);
|
||||
|
||||
if (slash_pos == std::string::npos)
|
||||
{
|
||||
/// File names.
|
||||
auto filename = path.substr(child_pos);
|
||||
if (!skip_list.contains(filename))
|
||||
result.emplace(std::move(filename));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage(
|
||||
@ -215,6 +235,9 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita
|
||||
auto keys_gen = std::make_shared<CommonPathPrefixKeyGenerator>(object_storage->getCommonKeyPrefix(), path_map);
|
||||
object_storage->setKeysGenerator(keys_gen);
|
||||
}
|
||||
|
||||
auto metric = object_storage->getMetadataStorageMetrics().unique_filenames_count;
|
||||
CurrentMetrics::add(metric, path_map->unique_filenames.size());
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage()
|
||||
@ -246,17 +269,8 @@ bool MetadataStorageFromPlainRewritableObjectStorage::existsDirectory(const std:
|
||||
|
||||
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const
|
||||
{
|
||||
auto key_prefix = object_storage->generateObjectKeyForPath(path, "" /* key_prefix */).serialize();
|
||||
|
||||
RelativePathsWithMetadata files;
|
||||
auto absolute_key = std::filesystem::path(object_storage->getCommonKeyPrefix()) / key_prefix / "";
|
||||
|
||||
object_storage->listObjects(absolute_key, files, 0);
|
||||
|
||||
std::unordered_set<std::string> directories;
|
||||
getDirectChildrenOnDisk(absolute_key, files, std::filesystem::path(path) / "", directories);
|
||||
|
||||
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
|
||||
std::unordered_set<std::string> result = getDirectChildrenOnDisk(std::filesystem::path(path) / "");
|
||||
return std::vector<std::string>(std::make_move_iterator(result.begin()), std::make_move_iterator(result.end()));
|
||||
}
|
||||
|
||||
std::optional<Poco::Timestamp> MetadataStorageFromPlainRewritableObjectStorage::getLastModifiedIfExists(const String & path) const
|
||||
@ -271,13 +285,41 @@ std::optional<Poco::Timestamp> MetadataStorageFromPlainRewritableObjectStorage::
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
std::unordered_set<std::string> & result) const
|
||||
std::unordered_set<std::string>
|
||||
MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(const std::filesystem::path & local_path) const
|
||||
{
|
||||
getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, *getPathMap(), result);
|
||||
std::unordered_set<std::string> result;
|
||||
SharedLockGuard lock(path_map->mutex);
|
||||
const auto end_it = path_map->map.end();
|
||||
/// Directories.
|
||||
for (auto it = path_map->map.lower_bound(local_path); it != end_it; ++it)
|
||||
{
|
||||
const auto & subdirectory = it->first.string();
|
||||
if (!subdirectory.starts_with(local_path.string()))
|
||||
break;
|
||||
|
||||
auto slash_num = count(subdirectory.begin() + local_path.string().size(), subdirectory.end(), '/');
|
||||
/// The directory map comparator ensures that the paths with the smallest number of
|
||||
/// hops from the local_path are iterated first. The paths do not end with '/', hence
|
||||
/// break the loop if the number of slashes to the right from the offset is greater than 0.
|
||||
if (slash_num != 0)
|
||||
break;
|
||||
|
||||
result.emplace(std::string(subdirectory.begin() + local_path.string().size(), subdirectory.end()) + "/");
|
||||
}
|
||||
|
||||
/// Files.
|
||||
auto it = path_map->map.find(local_path.parent_path());
|
||||
if (it != path_map->map.end())
|
||||
{
|
||||
for (const auto & filename_it : it->second.filename_iterators)
|
||||
{
|
||||
chassert(filename_it != path_map->unique_filenames.end());
|
||||
result.insert(*filename_it);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MetadataStorageFromPlainRewritableObjectStorage::useSeparateLayoutForMetadata() const
|
||||
|
@ -35,11 +35,7 @@ public:
|
||||
protected:
|
||||
std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; }
|
||||
std::shared_ptr<InMemoryDirectoryPathMap> getPathMap() const override { return path_map; }
|
||||
void getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
std::unordered_set<std::string> & result) const;
|
||||
std::unordered_set<std::string> getDirectChildrenOnDisk(const std::filesystem::path & local_path) const;
|
||||
|
||||
private:
|
||||
bool useSeparateLayoutForMetadata() const;
|
||||
|
@ -13,6 +13,8 @@ struct MetadataStorageMetrics
|
||||
const ProfileEvents::Event directory_removed = ProfileEvents::end();
|
||||
|
||||
CurrentMetrics::Metric directory_map_size = CurrentMetrics::end();
|
||||
CurrentMetrics::Metric unique_filenames_count = CurrentMetrics::end();
|
||||
CurrentMetrics::Metric file_count = CurrentMetrics::end();
|
||||
|
||||
template <typename ObjectStorage, MetadataStorageType metadata_type>
|
||||
static MetadataStorageMetrics create()
|
||||
|
@ -24,8 +24,14 @@ extern const Event DiskPlainRewritableS3DirectoryRemoved;
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DiskPlainRewritableAzureDirectoryMapSize;
|
||||
extern const Metric DiskPlainRewritableAzureUniqueFileNamesCount;
|
||||
extern const Metric DiskPlainRewritableAzureFileCount;
|
||||
extern const Metric DiskPlainRewritableLocalDirectoryMapSize;
|
||||
extern const Metric DiskPlainRewritableLocalUniqueFileNamesCount;
|
||||
extern const Metric DiskPlainRewritableLocalFileCount;
|
||||
extern const Metric DiskPlainRewritableS3DirectoryMapSize;
|
||||
extern const Metric DiskPlainRewritableS3UniqueFileNamesCount;
|
||||
extern const Metric DiskPlainRewritableS3FileCount;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -38,7 +44,9 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create<S3ObjectStorage, Me
|
||||
return MetadataStorageMetrics{
|
||||
.directory_created = ProfileEvents::DiskPlainRewritableS3DirectoryCreated,
|
||||
.directory_removed = ProfileEvents::DiskPlainRewritableS3DirectoryRemoved,
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableS3DirectoryMapSize};
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableS3DirectoryMapSize,
|
||||
.unique_filenames_count = CurrentMetrics::DiskPlainRewritableS3UniqueFileNamesCount,
|
||||
.file_count = CurrentMetrics::DiskPlainRewritableS3FileCount};
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -49,7 +57,9 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create<AzureObjectStorage,
|
||||
return MetadataStorageMetrics{
|
||||
.directory_created = ProfileEvents::DiskPlainRewritableAzureDirectoryCreated,
|
||||
.directory_removed = ProfileEvents::DiskPlainRewritableAzureDirectoryRemoved,
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableAzureDirectoryMapSize};
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableAzureDirectoryMapSize,
|
||||
.unique_filenames_count = CurrentMetrics::DiskPlainRewritableAzureUniqueFileNamesCount,
|
||||
.file_count = CurrentMetrics::DiskPlainRewritableAzureFileCount};
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -59,7 +69,9 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create<LocalObjectStorage,
|
||||
return MetadataStorageMetrics{
|
||||
.directory_created = ProfileEvents::DiskPlainRewritableLocalDirectoryCreated,
|
||||
.directory_removed = ProfileEvents::DiskPlainRewritableLocalDirectoryRemoved,
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableLocalDirectoryMapSize};
|
||||
.directory_map_size = CurrentMetrics::DiskPlainRewritableLocalDirectoryMapSize,
|
||||
.unique_filenames_count = CurrentMetrics::DiskPlainRewritableLocalUniqueFileNamesCount,
|
||||
.file_count = CurrentMetrics::DiskPlainRewritableLocalFileCount};
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -67,7 +67,6 @@
|
||||
#include <Access/SettingsConstraintsAndProfileIDs.h>
|
||||
#include <Access/ExternalAuthenticators.h>
|
||||
#include <Access/GSSAcceptor.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
#include <Backups/BackupsWorker.h>
|
||||
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
|
||||
#include <Interpreters/EmbeddedDictionaries.h>
|
||||
@ -92,6 +91,8 @@
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/Scheduler/createResourceManager.h>
|
||||
#include <Common/Scheduler/Workload/createWorkloadEntityStorage.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/Config/ConfigHelper.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
@ -370,6 +371,9 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable OnceFlag user_defined_sql_objects_storage_initialized;
|
||||
mutable std::unique_ptr<IUserDefinedSQLObjectsStorage> user_defined_sql_objects_storage;
|
||||
|
||||
mutable OnceFlag workload_entity_storage_initialized;
|
||||
mutable std::unique_ptr<IWorkloadEntityStorage> workload_entity_storage;
|
||||
|
||||
#if USE_NLP
|
||||
mutable OnceFlag synonyms_extensions_initialized;
|
||||
mutable std::optional<SynonymsExtensions> synonyms_extensions;
|
||||
@ -711,6 +715,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false));
|
||||
SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false));
|
||||
SHUTDOWN(log, "another UDFs storage", user_defined_sql_objects_storage, stopWatching());
|
||||
SHUTDOWN(log, "workload entity storage", workload_entity_storage, stopWatching());
|
||||
|
||||
LOG_TRACE(log, "Shutting down named sessions");
|
||||
Session::shutdownNamedSessions();
|
||||
@ -742,6 +747,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
|
||||
std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
|
||||
std::unique_ptr<IUserDefinedSQLObjectsStorage> delete_user_defined_sql_objects_storage;
|
||||
std::unique_ptr<IWorkloadEntityStorage> delete_workload_entity_storage;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
|
||||
@ -826,6 +832,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
|
||||
delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
|
||||
delete_user_defined_sql_objects_storage = std::move(user_defined_sql_objects_storage);
|
||||
delete_workload_entity_storage = std::move(workload_entity_storage);
|
||||
delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
|
||||
delete_schedule_pool = std::move(schedule_pool);
|
||||
delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
|
||||
@ -844,6 +851,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_external_dictionaries_loader.reset();
|
||||
delete_external_user_defined_executable_functions_loader.reset();
|
||||
delete_user_defined_sql_objects_storage.reset();
|
||||
delete_workload_entity_storage.reset();
|
||||
delete_ddl_worker.reset();
|
||||
delete_buffer_flush_schedule_pool.reset();
|
||||
delete_schedule_pool.reset();
|
||||
@ -1768,7 +1776,7 @@ std::vector<UUID> Context::getEnabledProfiles() const
|
||||
ResourceManagerPtr Context::getResourceManager() const
|
||||
{
|
||||
callOnce(shared->resource_manager_initialized, [&] {
|
||||
shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic"));
|
||||
shared->resource_manager = createResourceManager(getGlobalContext());
|
||||
});
|
||||
|
||||
return shared->resource_manager;
|
||||
@ -3015,6 +3023,16 @@ void Context::setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObj
|
||||
shared->user_defined_sql_objects_storage = std::move(storage);
|
||||
}
|
||||
|
||||
IWorkloadEntityStorage & Context::getWorkloadEntityStorage() const
|
||||
{
|
||||
callOnce(shared->workload_entity_storage_initialized, [&] {
|
||||
shared->workload_entity_storage = createWorkloadEntityStorage(getGlobalContext());
|
||||
});
|
||||
|
||||
std::lock_guard lock(shared->mutex);
|
||||
return *shared->workload_entity_storage;
|
||||
}
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
SynonymsExtensions & Context::getSynonymsExtensions() const
|
||||
|
@ -76,6 +76,7 @@ class EmbeddedDictionaries;
|
||||
class ExternalDictionariesLoader;
|
||||
class ExternalUserDefinedExecutableFunctionsLoader;
|
||||
class IUserDefinedSQLObjectsStorage;
|
||||
class IWorkloadEntityStorage;
|
||||
class InterserverCredentials;
|
||||
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
|
||||
class InterserverIOHandler;
|
||||
@ -893,6 +894,8 @@ public:
|
||||
void setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObjectsStorage> storage);
|
||||
void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
IWorkloadEntityStorage & getWorkloadEntityStorage() const;
|
||||
|
||||
#if USE_NLP
|
||||
SynonymsExtensions & getSynonymsExtensions() const;
|
||||
Lemmatizers & getLemmatizers() const;
|
||||
|
68
src/Interpreters/InterpreterCreateResourceQuery.cpp
Normal file
68
src/Interpreters/InterpreterCreateResourceQuery.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterCreateResourceQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterCreateResourceQuery::execute()
|
||||
{
|
||||
ASTCreateResourceQuery & create_resource_query = query_ptr->as<ASTCreateResourceQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::CREATE_RESOURCE);
|
||||
|
||||
if (create_resource_query.or_replace)
|
||||
access_rights_elements.emplace_back(AccessType::DROP_RESOURCE);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!create_resource_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
auto resource_name = create_resource_query.getResourceName();
|
||||
bool throw_if_exists = !create_resource_query.if_not_exists && !create_resource_query.or_replace;
|
||||
bool replace_if_exists = create_resource_query.or_replace;
|
||||
|
||||
current_context->getWorkloadEntityStorage().storeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Resource,
|
||||
resource_name,
|
||||
query_ptr,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
current_context->getSettingsRef());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterCreateResourceQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterCreateResourceQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterCreateResourceQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
25
src/Interpreters/InterpreterCreateResourceQuery.h
Normal file
25
src/Interpreters/InterpreterCreateResourceQuery.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterCreateResourceQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterCreateResourceQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_), query_ptr(query_ptr_)
|
||||
{
|
||||
}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
68
src/Interpreters/InterpreterCreateWorkloadQuery.cpp
Normal file
68
src/Interpreters/InterpreterCreateWorkloadQuery.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterCreateWorkloadQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterCreateWorkloadQuery::execute()
|
||||
{
|
||||
ASTCreateWorkloadQuery & create_workload_query = query_ptr->as<ASTCreateWorkloadQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::CREATE_WORKLOAD);
|
||||
|
||||
if (create_workload_query.or_replace)
|
||||
access_rights_elements.emplace_back(AccessType::DROP_WORKLOAD);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!create_workload_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
auto workload_name = create_workload_query.getWorkloadName();
|
||||
bool throw_if_exists = !create_workload_query.if_not_exists && !create_workload_query.or_replace;
|
||||
bool replace_if_exists = create_workload_query.or_replace;
|
||||
|
||||
current_context->getWorkloadEntityStorage().storeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Workload,
|
||||
workload_name,
|
||||
query_ptr,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
current_context->getSettingsRef());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterCreateWorkloadQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterCreateWorkloadQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterCreateWorkloadQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
25
src/Interpreters/InterpreterCreateWorkloadQuery.h
Normal file
25
src/Interpreters/InterpreterCreateWorkloadQuery.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterCreateWorkloadQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterCreateWorkloadQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_), query_ptr(query_ptr_)
|
||||
{
|
||||
}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
60
src/Interpreters/InterpreterDropResourceQuery.cpp
Normal file
60
src/Interpreters/InterpreterDropResourceQuery.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterDropResourceQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterDropResourceQuery::execute()
|
||||
{
|
||||
ASTDropResourceQuery & drop_resource_query = query_ptr->as<ASTDropResourceQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::DROP_RESOURCE);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!drop_resource_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
bool throw_if_not_exists = !drop_resource_query.if_exists;
|
||||
|
||||
current_context->getWorkloadEntityStorage().removeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Resource,
|
||||
drop_resource_query.resource_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterDropResourceQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterDropResourceQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterDropResourceQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
21
src/Interpreters/InterpreterDropResourceQuery.h
Normal file
21
src/Interpreters/InterpreterDropResourceQuery.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterDropResourceQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterDropResourceQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
60
src/Interpreters/InterpreterDropWorkloadQuery.cpp
Normal file
60
src/Interpreters/InterpreterDropWorkloadQuery.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterDropWorkloadQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterDropWorkloadQuery::execute()
|
||||
{
|
||||
ASTDropWorkloadQuery & drop_workload_query = query_ptr->as<ASTDropWorkloadQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::DROP_WORKLOAD);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!drop_workload_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
bool throw_if_not_exists = !drop_workload_query.if_exists;
|
||||
|
||||
current_context->getWorkloadEntityStorage().removeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Workload,
|
||||
drop_workload_query.workload_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterDropWorkloadQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterDropWorkloadQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterDropWorkloadQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
21
src/Interpreters/InterpreterDropWorkloadQuery.h
Normal file
21
src/Interpreters/InterpreterDropWorkloadQuery.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterDropWorkloadQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterDropWorkloadQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
@ -3,9 +3,13 @@
|
||||
#include <Parsers/ASTCheckQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTCreateFunctionQuery.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTCreateIndexQuery.h>
|
||||
#include <Parsers/ASTDeleteQuery.h>
|
||||
#include <Parsers/ASTDropFunctionQuery.h>
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Parsers/ASTDropIndexQuery.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTUndropQuery.h>
|
||||
@ -332,6 +336,22 @@ InterpreterFactory::InterpreterPtr InterpreterFactory::get(ASTPtr & query, Conte
|
||||
{
|
||||
interpreter_name = "InterpreterDropFunctionQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateWorkloadQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateWorkloadQuery";
|
||||
}
|
||||
else if (query->as<ASTDropWorkloadQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterDropWorkloadQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateResourceQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateResourceQuery";
|
||||
}
|
||||
else if (query->as<ASTDropResourceQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterDropResourceQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateIndexQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateIndexQuery";
|
||||
|
@ -51,9 +51,9 @@ using NamedSessionKey = std::pair<UUID, String>;
|
||||
struct NamedSessionData
|
||||
{
|
||||
NamedSessionKey key;
|
||||
UInt64 close_cycle = 0;
|
||||
ContextMutablePtr context;
|
||||
std::chrono::steady_clock::duration timeout;
|
||||
std::chrono::steady_clock::time_point close_time_bucket{};
|
||||
NamedSessionsStorage & parent;
|
||||
|
||||
NamedSessionData(NamedSessionKey key_, ContextPtr context_, std::chrono::steady_clock::duration timeout_, NamedSessionsStorage & parent_)
|
||||
@ -137,6 +137,18 @@ public:
|
||||
|
||||
if (!isSharedPtrUnique(session))
|
||||
throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id);
|
||||
|
||||
if (session->close_time_bucket != std::chrono::steady_clock::time_point{})
|
||||
{
|
||||
auto bucket_it = close_time_buckets.find(session->close_time_bucket);
|
||||
auto & bucket_sessions = bucket_it->second;
|
||||
bucket_sessions.erase(key);
|
||||
if (bucket_sessions.empty())
|
||||
close_time_buckets.erase(bucket_it);
|
||||
|
||||
session->close_time_bucket = std::chrono::steady_clock::time_point{};
|
||||
}
|
||||
|
||||
return {session, false};
|
||||
}
|
||||
|
||||
@ -179,33 +191,31 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
/// TODO it's very complicated. Make simple std::map with time_t or boost::multi_index.
|
||||
using Container = std::unordered_map<Key, std::shared_ptr<NamedSessionData>, SessionKeyHash>;
|
||||
using CloseTimes = std::deque<std::vector<Key>>;
|
||||
Container sessions;
|
||||
CloseTimes close_times;
|
||||
std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1);
|
||||
std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now();
|
||||
UInt64 close_cycle = 0;
|
||||
|
||||
// Ordered map of close times for sessions, grouped by the next multiple of close_interval
|
||||
using CloseTimes = std::map<std::chrono::steady_clock::time_point, std::unordered_set<Key, SessionKeyHash>>;
|
||||
CloseTimes close_time_buckets;
|
||||
|
||||
constexpr static std::chrono::steady_clock::duration close_interval = std::chrono::milliseconds(1000);
|
||||
constexpr static std::chrono::nanoseconds::rep close_interval_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(close_interval).count();
|
||||
|
||||
void scheduleCloseSession(NamedSessionData & session, std::unique_lock<std::mutex> &)
|
||||
{
|
||||
/// Push it on a queue of sessions to close, on a position corresponding to the timeout.
|
||||
/// (timeout is measured from current moment of time)
|
||||
chassert(session.close_time_bucket == std::chrono::steady_clock::time_point{});
|
||||
|
||||
const UInt64 close_index = session.timeout / close_interval + 1;
|
||||
const auto new_close_cycle = close_cycle + close_index;
|
||||
const auto session_close_time = std::chrono::steady_clock::now() + session.timeout;
|
||||
const auto session_close_time_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(session_close_time.time_since_epoch()).count();
|
||||
const auto bucket_padding = close_interval - std::chrono::nanoseconds(session_close_time_ns % close_interval_ns);
|
||||
const auto close_time_bucket = session_close_time + bucket_padding;
|
||||
|
||||
if (session.close_cycle != new_close_cycle)
|
||||
{
|
||||
session.close_cycle = new_close_cycle;
|
||||
if (close_times.size() < close_index + 1)
|
||||
close_times.resize(close_index + 1);
|
||||
close_times[close_index].emplace_back(session.key);
|
||||
}
|
||||
session.close_time_bucket = close_time_bucket;
|
||||
auto & bucket_sessions = close_time_buckets[close_time_bucket];
|
||||
bucket_sessions.insert(session.key);
|
||||
|
||||
LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}",
|
||||
session.key.second, session.key.first);
|
||||
session.key.second, session.key.first);
|
||||
}
|
||||
|
||||
void cleanThread()
|
||||
@ -214,55 +224,46 @@ private:
|
||||
std::unique_lock lock{mutex};
|
||||
while (!quit)
|
||||
{
|
||||
auto interval = closeSessions(lock);
|
||||
if (cond.wait_for(lock, interval, [this]() -> bool { return quit; }))
|
||||
closeSessions(lock);
|
||||
if (cond.wait_for(lock, close_interval, [this]() -> bool { return quit; }))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added.
|
||||
std::chrono::steady_clock::duration closeSessions(std::unique_lock<std::mutex> & lock)
|
||||
void closeSessions(std::unique_lock<std::mutex> & lock)
|
||||
{
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
|
||||
/// The time to close the next session did not come
|
||||
if (now < close_cycle_time)
|
||||
return close_cycle_time - now; /// Will sleep until it comes.
|
||||
|
||||
const auto current_cycle = close_cycle;
|
||||
|
||||
++close_cycle;
|
||||
close_cycle_time = now + close_interval;
|
||||
|
||||
if (close_times.empty())
|
||||
return close_interval;
|
||||
|
||||
auto & sessions_to_close = close_times.front();
|
||||
|
||||
for (const auto & key : sessions_to_close)
|
||||
for (auto bucket_it = close_time_buckets.begin(); bucket_it != close_time_buckets.end(); bucket_it = close_time_buckets.erase(bucket_it))
|
||||
{
|
||||
const auto session = sessions.find(key);
|
||||
const auto & [time_bucket, session_keys] = *bucket_it;
|
||||
if (time_bucket > now)
|
||||
break;
|
||||
|
||||
if (session != sessions.end() && session->second->close_cycle <= current_cycle)
|
||||
for (const auto & key : session_keys)
|
||||
{
|
||||
if (session->second.use_count() != 1)
|
||||
{
|
||||
LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}", key.second, key.first);
|
||||
const auto & session_it = sessions.find(key);
|
||||
|
||||
/// Skip but move it to close on the next cycle.
|
||||
session->second->timeout = std::chrono::steady_clock::duration{0};
|
||||
scheduleCloseSession(*session->second, lock);
|
||||
}
|
||||
else
|
||||
if (session_it == sessions.end())
|
||||
continue;
|
||||
|
||||
const auto & session = session_it->second;
|
||||
|
||||
if (session.use_count() != 1)
|
||||
{
|
||||
LOG_TRACE(log, "Close session with session_id: {}, user_id: {}", key.second, key.first);
|
||||
sessions.erase(session);
|
||||
LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}, refcount: {}",
|
||||
key.second, key.first, session.use_count());
|
||||
|
||||
session->timeout = std::chrono::steady_clock::duration{0};
|
||||
scheduleCloseSession(*session, lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Close session with session_id: {}, user_id: {}", key.second, key.first);
|
||||
|
||||
sessions.erase(session_it);
|
||||
}
|
||||
}
|
||||
|
||||
close_times.pop_front();
|
||||
return close_interval;
|
||||
}
|
||||
|
||||
std::mutex mutex;
|
||||
|
@ -52,6 +52,10 @@ void registerInterpreterExternalDDLQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterTransactionControlQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropFunctionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateWorkloadQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropWorkloadQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateResourceQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropResourceQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateIndexQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropIndexQuery(InterpreterFactory & factory);
|
||||
@ -111,6 +115,10 @@ void registerInterpreters()
|
||||
registerInterpreterTransactionControlQuery(factory);
|
||||
registerInterpreterCreateFunctionQuery(factory);
|
||||
registerInterpreterDropFunctionQuery(factory);
|
||||
registerInterpreterCreateWorkloadQuery(factory);
|
||||
registerInterpreterDropWorkloadQuery(factory);
|
||||
registerInterpreterCreateResourceQuery(factory);
|
||||
registerInterpreterDropResourceQuery(factory);
|
||||
registerInterpreterCreateIndexQuery(factory);
|
||||
registerInterpreterCreateNamedCollectionQuery(factory);
|
||||
registerInterpreterDropIndexQuery(factory);
|
||||
|
83
src/Parsers/ASTCreateResourceQuery.cpp
Normal file
83
src/Parsers/ASTCreateResourceQuery.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTCreateResourceQuery::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTCreateResourceQuery>(*this);
|
||||
res->children.clear();
|
||||
|
||||
res->resource_name = resource_name->clone();
|
||||
res->children.push_back(res->resource_name);
|
||||
|
||||
res->operations = operations;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTCreateResourceQuery::formatImpl(const IAST::FormatSettings & format, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
format.ostr << (format.hilite ? hilite_keyword : "") << "CREATE ";
|
||||
|
||||
if (or_replace)
|
||||
format.ostr << "OR REPLACE ";
|
||||
|
||||
format.ostr << "RESOURCE ";
|
||||
|
||||
if (if_not_exists)
|
||||
format.ostr << "IF NOT EXISTS ";
|
||||
|
||||
format.ostr << (format.hilite ? hilite_none : "");
|
||||
|
||||
format.ostr << (format.hilite ? hilite_identifier : "") << backQuoteIfNeed(getResourceName()) << (format.hilite ? hilite_none : "");
|
||||
|
||||
formatOnCluster(format);
|
||||
|
||||
format.ostr << " (";
|
||||
|
||||
bool first = true;
|
||||
for (const auto & operation : operations)
|
||||
{
|
||||
if (!first)
|
||||
format.ostr << ", ";
|
||||
else
|
||||
first = false;
|
||||
|
||||
switch (operation.mode)
|
||||
{
|
||||
case AccessMode::Read:
|
||||
{
|
||||
format.ostr << (format.hilite ? hilite_keyword : "") << "READ ";
|
||||
break;
|
||||
}
|
||||
case AccessMode::Write:
|
||||
{
|
||||
format.ostr << (format.hilite ? hilite_keyword : "") << "WRITE ";
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (operation.disk)
|
||||
{
|
||||
format.ostr << "DISK " << (format.hilite ? hilite_none : "");
|
||||
format.ostr << (format.hilite ? hilite_identifier : "") << backQuoteIfNeed(*operation.disk) << (format.hilite ? hilite_none : "");
|
||||
}
|
||||
else
|
||||
format.ostr << "ANY DISK" << (format.hilite ? hilite_none : "");
|
||||
}
|
||||
|
||||
format.ostr << ")";
|
||||
}
|
||||
|
||||
String ASTCreateResourceQuery::getResourceName() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(resource_name, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
48
src/Parsers/ASTCreateResourceQuery.h
Normal file
48
src/Parsers/ASTCreateResourceQuery.h
Normal file
@ -0,0 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTCreateResourceQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
enum class AccessMode
|
||||
{
|
||||
Read,
|
||||
Write
|
||||
};
|
||||
struct Operation
|
||||
{
|
||||
AccessMode mode;
|
||||
std::optional<String> disk; // Applies to all disks if not set
|
||||
|
||||
friend bool operator ==(const Operation & lhs, const Operation & rhs) { return lhs.mode == rhs.mode && lhs.disk == rhs.disk; }
|
||||
friend bool operator !=(const Operation & lhs, const Operation & rhs) { return !(lhs == rhs); }
|
||||
};
|
||||
|
||||
using Operations = std::vector<Operation>;
|
||||
|
||||
ASTPtr resource_name;
|
||||
Operations operations; /// List of operations that require this resource
|
||||
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
String getID(char delim) const override { return "CreateResourceQuery" + (delim + getResourceName()); }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & format, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateResourceQuery>(clone()); }
|
||||
|
||||
String getResourceName() const;
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Create; }
|
||||
};
|
||||
|
||||
}
|
95
src/Parsers/ASTCreateWorkloadQuery.cpp
Normal file
95
src/Parsers/ASTCreateWorkloadQuery.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTCreateWorkloadQuery::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTCreateWorkloadQuery>(*this);
|
||||
res->children.clear();
|
||||
|
||||
res->workload_name = workload_name->clone();
|
||||
res->children.push_back(res->workload_name);
|
||||
|
||||
if (workload_parent)
|
||||
{
|
||||
res->workload_parent = workload_parent->clone();
|
||||
res->children.push_back(res->workload_parent);
|
||||
}
|
||||
|
||||
res->changes = changes;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTCreateWorkloadQuery::formatImpl(const IAST::FormatSettings & format, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
format.ostr << (format.hilite ? hilite_keyword : "") << "CREATE ";
|
||||
|
||||
if (or_replace)
|
||||
format.ostr << "OR REPLACE ";
|
||||
|
||||
format.ostr << "WORKLOAD ";
|
||||
|
||||
if (if_not_exists)
|
||||
format.ostr << "IF NOT EXISTS ";
|
||||
|
||||
format.ostr << (format.hilite ? hilite_none : "");
|
||||
|
||||
format.ostr << (format.hilite ? hilite_identifier : "") << backQuoteIfNeed(getWorkloadName()) << (format.hilite ? hilite_none : "");
|
||||
|
||||
formatOnCluster(format);
|
||||
|
||||
if (hasParent())
|
||||
{
|
||||
format.ostr << (format.hilite ? hilite_keyword : "") << " IN " << (format.hilite ? hilite_none : "");
|
||||
format.ostr << (format.hilite ? hilite_identifier : "") << backQuoteIfNeed(getWorkloadParent()) << (format.hilite ? hilite_none : "");
|
||||
}
|
||||
|
||||
if (!changes.empty())
|
||||
{
|
||||
format.ostr << ' ' << (format.hilite ? hilite_keyword : "") << "SETTINGS" << (format.hilite ? hilite_none : "") << ' ';
|
||||
|
||||
bool first = true;
|
||||
|
||||
for (const auto & change : changes)
|
||||
{
|
||||
if (!first)
|
||||
format.ostr << ", ";
|
||||
else
|
||||
first = false;
|
||||
format.ostr << change.name << " = " << applyVisitor(FieldVisitorToString(), change.value);
|
||||
if (!change.resource.empty())
|
||||
{
|
||||
format.ostr << ' ' << (format.hilite ? hilite_keyword : "") << "FOR" << (format.hilite ? hilite_none : "") << ' ';
|
||||
format.ostr << (format.hilite ? hilite_identifier : "") << backQuoteIfNeed(change.resource) << (format.hilite ? hilite_none : "");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String ASTCreateWorkloadQuery::getWorkloadName() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(workload_name, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
bool ASTCreateWorkloadQuery::hasParent() const
|
||||
{
|
||||
return workload_parent != nullptr;
|
||||
}
|
||||
|
||||
String ASTCreateWorkloadQuery::getWorkloadParent() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(workload_parent, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
53
src/Parsers/ASTCreateWorkloadQuery.h
Normal file
53
src/Parsers/ASTCreateWorkloadQuery.h
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
#include <Common/SettingsChanges.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTCreateWorkloadQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
ASTPtr workload_name;
|
||||
ASTPtr workload_parent;
|
||||
|
||||
/// Special version of settings that support optional `FOR resource` clause
|
||||
struct SettingChange
|
||||
{
|
||||
String name;
|
||||
Field value;
|
||||
String resource;
|
||||
|
||||
SettingChange() = default;
|
||||
SettingChange(std::string_view name_, const Field & value_, std::string_view resource_) : name(name_), value(value_), resource(resource_) {}
|
||||
SettingChange(std::string_view name_, Field && value_, std::string_view resource_) : name(name_), value(std::move(value_)), resource(resource_) {}
|
||||
|
||||
friend bool operator ==(const SettingChange & lhs, const SettingChange & rhs) { return (lhs.name == rhs.name) && (lhs.value == rhs.value) && (lhs.resource == rhs.resource); }
|
||||
friend bool operator !=(const SettingChange & lhs, const SettingChange & rhs) { return !(lhs == rhs); }
|
||||
};
|
||||
|
||||
using SettingsChanges = std::vector<SettingChange>;
|
||||
SettingsChanges changes;
|
||||
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
String getID(char delim) const override { return "CreateWorkloadQuery" + (delim + getWorkloadName()); }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & format, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateWorkloadQuery>(clone()); }
|
||||
|
||||
String getWorkloadName() const;
|
||||
bool hasParent() const;
|
||||
String getWorkloadParent() const;
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Create; }
|
||||
};
|
||||
|
||||
}
|
25
src/Parsers/ASTDropResourceQuery.cpp
Normal file
25
src/Parsers/ASTDropResourceQuery.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTDropResourceQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTDropResourceQuery>(*this);
|
||||
}
|
||||
|
||||
void ASTDropResourceQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP RESOURCE ";
|
||||
|
||||
if (if_exists)
|
||||
settings.ostr << "IF EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(resource_name) << (settings.hilite ? hilite_none : "");
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
|
||||
}
|
28
src/Parsers/ASTDropResourceQuery.h
Normal file
28
src/Parsers/ASTDropResourceQuery.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTDropResourceQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String resource_name;
|
||||
|
||||
bool if_exists = false;
|
||||
|
||||
String getID(char) const override { return "DropResourceQuery"; }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTDropResourceQuery>(clone()); }
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Drop; }
|
||||
};
|
||||
|
||||
}
|
25
src/Parsers/ASTDropWorkloadQuery.cpp
Normal file
25
src/Parsers/ASTDropWorkloadQuery.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTDropWorkloadQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTDropWorkloadQuery>(*this);
|
||||
}
|
||||
|
||||
void ASTDropWorkloadQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP WORKLOAD ";
|
||||
|
||||
if (if_exists)
|
||||
settings.ostr << "IF EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(workload_name) << (settings.hilite ? hilite_none : "");
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
|
||||
}
|
28
src/Parsers/ASTDropWorkloadQuery.h
Normal file
28
src/Parsers/ASTDropWorkloadQuery.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTDropWorkloadQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String workload_name;
|
||||
|
||||
bool if_exists = false;
|
||||
|
||||
String getID(char) const override { return "DropWorkloadQuery"; }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTDropWorkloadQuery>(clone()); }
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Drop; }
|
||||
};
|
||||
|
||||
}
|
@ -392,6 +392,7 @@ namespace DB
|
||||
MR_MACROS(RANDOMIZE_FOR, "RANDOMIZE FOR") \
|
||||
MR_MACROS(RANDOMIZED, "RANDOMIZED") \
|
||||
MR_MACROS(RANGE, "RANGE") \
|
||||
MR_MACROS(READ, "READ") \
|
||||
MR_MACROS(READONLY, "READONLY") \
|
||||
MR_MACROS(REALM, "REALM") \
|
||||
MR_MACROS(RECOMPRESS, "RECOMPRESS") \
|
||||
@ -411,6 +412,7 @@ namespace DB
|
||||
MR_MACROS(REPLACE, "REPLACE") \
|
||||
MR_MACROS(RESET_SETTING, "RESET SETTING") \
|
||||
MR_MACROS(RESET_AUTHENTICATION_METHODS_TO_NEW, "RESET AUTHENTICATION METHODS TO NEW") \
|
||||
MR_MACROS(RESOURCE, "RESOURCE") \
|
||||
MR_MACROS(RESPECT_NULLS, "RESPECT NULLS") \
|
||||
MR_MACROS(RESTORE, "RESTORE") \
|
||||
MR_MACROS(RESTRICT, "RESTRICT") \
|
||||
@ -523,6 +525,7 @@ namespace DB
|
||||
MR_MACROS(WHEN, "WHEN") \
|
||||
MR_MACROS(WHERE, "WHERE") \
|
||||
MR_MACROS(WINDOW, "WINDOW") \
|
||||
MR_MACROS(WORKLOAD, "WORKLOAD") \
|
||||
MR_MACROS(QUALIFY, "QUALIFY") \
|
||||
MR_MACROS(WITH_ADMIN_OPTION, "WITH ADMIN OPTION") \
|
||||
MR_MACROS(WITH_CHECK, "WITH CHECK") \
|
||||
@ -535,6 +538,7 @@ namespace DB
|
||||
MR_MACROS(WITH, "WITH") \
|
||||
MR_MACROS(RECURSIVE, "RECURSIVE") \
|
||||
MR_MACROS(WK, "WK") \
|
||||
MR_MACROS(WRITE, "WRITE") \
|
||||
MR_MACROS(WRITABLE, "WRITABLE") \
|
||||
MR_MACROS(WW, "WW") \
|
||||
MR_MACROS(YEAR, "YEAR") \
|
||||
|
144
src/Parsers/ParserCreateResourceQuery.cpp
Normal file
144
src/Parsers/ParserCreateResourceQuery.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool parseOneOperation(ASTCreateResourceQuery::Operation & operation, IParser::Pos & pos, Expected & expected)
|
||||
{
|
||||
ParserIdentifier disk_name_p;
|
||||
|
||||
ASTCreateResourceQuery::AccessMode mode;
|
||||
ASTPtr node;
|
||||
std::optional<String> disk;
|
||||
|
||||
if (ParserKeyword(Keyword::WRITE).ignore(pos, expected))
|
||||
mode = ASTCreateResourceQuery::AccessMode::Write;
|
||||
else if (ParserKeyword(Keyword::READ).ignore(pos, expected))
|
||||
mode = ASTCreateResourceQuery::AccessMode::Read;
|
||||
else
|
||||
return false;
|
||||
|
||||
if (ParserKeyword(Keyword::ANY).ignore(pos, expected))
|
||||
{
|
||||
if (!ParserKeyword(Keyword::DISK).ignore(pos, expected))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!ParserKeyword(Keyword::DISK).ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!disk_name_p.parse(pos, node, expected))
|
||||
return false;
|
||||
|
||||
disk.emplace();
|
||||
if (!tryGetIdentifierNameInto(node, *disk))
|
||||
return false;
|
||||
}
|
||||
|
||||
operation.mode = mode;
|
||||
operation.disk = std::move(disk);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseOperations(IParser::Pos & pos, Expected & expected, ASTCreateResourceQuery::Operations & operations)
|
||||
{
|
||||
return IParserBase::wrapParseImpl(pos, [&]
|
||||
{
|
||||
ParserToken s_open(TokenType::OpeningRoundBracket);
|
||||
ParserToken s_close(TokenType::ClosingRoundBracket);
|
||||
|
||||
if (!s_open.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
ASTCreateResourceQuery::Operations res_operations;
|
||||
|
||||
auto parse_operation = [&]
|
||||
{
|
||||
ASTCreateResourceQuery::Operation operation;
|
||||
if (!parseOneOperation(operation, pos, expected))
|
||||
return false;
|
||||
res_operations.push_back(std::move(operation));
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!ParserList::parseUtil(pos, expected, parse_operation, false))
|
||||
return false;
|
||||
|
||||
if (!s_close.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
operations = std::move(res_operations);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool ParserCreateResourceQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_create(Keyword::CREATE);
|
||||
ParserKeyword s_resource(Keyword::RESOURCE);
|
||||
ParserKeyword s_or_replace(Keyword::OR_REPLACE);
|
||||
ParserKeyword s_if_not_exists(Keyword::IF_NOT_EXISTS);
|
||||
ParserKeyword s_on(Keyword::ON);
|
||||
ParserIdentifier resource_name_p;
|
||||
|
||||
ASTPtr resource_name;
|
||||
|
||||
String cluster_str;
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
if (!s_create.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_or_replace.ignore(pos, expected))
|
||||
or_replace = true;
|
||||
|
||||
if (!s_resource.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!or_replace && s_if_not_exists.ignore(pos, expected))
|
||||
if_not_exists = true;
|
||||
|
||||
if (!resource_name_p.parse(pos, resource_name, expected))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
ASTCreateResourceQuery::Operations operations;
|
||||
if (!parseOperations(pos, expected, operations))
|
||||
return false;
|
||||
|
||||
auto create_resource_query = std::make_shared<ASTCreateResourceQuery>();
|
||||
node = create_resource_query;
|
||||
|
||||
create_resource_query->resource_name = resource_name;
|
||||
create_resource_query->children.push_back(resource_name);
|
||||
|
||||
create_resource_query->or_replace = or_replace;
|
||||
create_resource_query->if_not_exists = if_not_exists;
|
||||
create_resource_query->cluster = std::move(cluster_str);
|
||||
|
||||
create_resource_query->operations = std::move(operations);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
16
src/Parsers/ParserCreateResourceQuery.h
Normal file
16
src/Parsers/ParserCreateResourceQuery.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include "IParserBase.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// CREATE RESOURCE cache_io (WRITE DISK s3diskWithCache, READ DISK s3diskWithCache)
|
||||
class ParserCreateResourceQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "CREATE RESOURCE query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
16
src/Parsers/ParserCreateWorkloadEntity.cpp
Normal file
16
src/Parsers/ParserCreateWorkloadEntity.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <Parsers/ParserCreateWorkloadEntity.h>
|
||||
#include <Parsers/ParserCreateWorkloadQuery.h>
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ParserCreateWorkloadEntity::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserCreateWorkloadQuery create_workload_p;
|
||||
ParserCreateResourceQuery create_resource_p;
|
||||
|
||||
return create_workload_p.parse(pos, node, expected) || create_resource_p.parse(pos, node, expected);
|
||||
}
|
||||
|
||||
}
|
17
src/Parsers/ParserCreateWorkloadEntity.h
Normal file
17
src/Parsers/ParserCreateWorkloadEntity.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IParserBase.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Special parser for the CREATE WORKLOAD and CREATE RESOURCE queries.
|
||||
class ParserCreateWorkloadEntity : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "CREATE workload entity query"; }
|
||||
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
155
src/Parsers/ParserCreateWorkloadQuery.cpp
Normal file
155
src/Parsers/ParserCreateWorkloadQuery.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
#include <Parsers/ParserCreateWorkloadQuery.h>
|
||||
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
|
||||
#include <Common/SettingsChanges.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool parseWorkloadSetting(
|
||||
ASTCreateWorkloadQuery::SettingChange & change, IParser::Pos & pos, Expected & expected)
|
||||
{
|
||||
ParserIdentifier name_p;
|
||||
ParserLiteral value_p;
|
||||
ParserToken s_eq(TokenType::Equals);
|
||||
ParserIdentifier resource_name_p;
|
||||
|
||||
ASTPtr name_node;
|
||||
ASTPtr value_node;
|
||||
ASTPtr resource_name_node;
|
||||
|
||||
String name;
|
||||
String resource_name;
|
||||
|
||||
if (!name_p.parse(pos, name_node, expected))
|
||||
return false;
|
||||
tryGetIdentifierNameInto(name_node, name);
|
||||
|
||||
if (!s_eq.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!value_p.parse(pos, value_node, expected))
|
||||
return false;
|
||||
|
||||
if (ParserKeyword(Keyword::FOR).ignore(pos, expected))
|
||||
{
|
||||
if (!resource_name_p.parse(pos, resource_name_node, expected))
|
||||
return false;
|
||||
tryGetIdentifierNameInto(resource_name_node, resource_name);
|
||||
}
|
||||
|
||||
change.name = std::move(name);
|
||||
change.value = value_node->as<ASTLiteral &>().value;
|
||||
change.resource = std::move(resource_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseSettings(IParser::Pos & pos, Expected & expected, ASTCreateWorkloadQuery::SettingsChanges & changes)
|
||||
{
|
||||
return IParserBase::wrapParseImpl(pos, [&]
|
||||
{
|
||||
if (!ParserKeyword(Keyword::SETTINGS).ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
ASTCreateWorkloadQuery::SettingsChanges res_changes;
|
||||
|
||||
auto parse_setting = [&]
|
||||
{
|
||||
ASTCreateWorkloadQuery::SettingChange change;
|
||||
if (!parseWorkloadSetting(change, pos, expected))
|
||||
return false;
|
||||
res_changes.push_back(std::move(change));
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!ParserList::parseUtil(pos, expected, parse_setting, false))
|
||||
return false;
|
||||
|
||||
changes = std::move(res_changes);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool ParserCreateWorkloadQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_create(Keyword::CREATE);
|
||||
ParserKeyword s_workload(Keyword::WORKLOAD);
|
||||
ParserKeyword s_or_replace(Keyword::OR_REPLACE);
|
||||
ParserKeyword s_if_not_exists(Keyword::IF_NOT_EXISTS);
|
||||
ParserIdentifier workload_name_p;
|
||||
ParserKeyword s_on(Keyword::ON);
|
||||
ParserKeyword s_in(Keyword::IN);
|
||||
|
||||
ASTPtr workload_name;
|
||||
ASTPtr workload_parent;
|
||||
|
||||
String cluster_str;
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
if (!s_create.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_or_replace.ignore(pos, expected))
|
||||
or_replace = true;
|
||||
|
||||
if (!s_workload.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!or_replace && s_if_not_exists.ignore(pos, expected))
|
||||
if_not_exists = true;
|
||||
|
||||
if (!workload_name_p.parse(pos, workload_name, expected))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_in.ignore(pos, expected))
|
||||
{
|
||||
if (!workload_name_p.parse(pos, workload_parent, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
ASTCreateWorkloadQuery::SettingsChanges changes;
|
||||
parseSettings(pos, expected, changes);
|
||||
|
||||
auto create_workload_query = std::make_shared<ASTCreateWorkloadQuery>();
|
||||
node = create_workload_query;
|
||||
|
||||
create_workload_query->workload_name = workload_name;
|
||||
create_workload_query->children.push_back(workload_name);
|
||||
|
||||
if (workload_parent)
|
||||
{
|
||||
create_workload_query->workload_parent = workload_parent;
|
||||
create_workload_query->children.push_back(workload_parent);
|
||||
}
|
||||
|
||||
create_workload_query->or_replace = or_replace;
|
||||
create_workload_query->if_not_exists = if_not_exists;
|
||||
create_workload_query->cluster = std::move(cluster_str);
|
||||
create_workload_query->changes = std::move(changes);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
16
src/Parsers/ParserCreateWorkloadQuery.h
Normal file
16
src/Parsers/ParserCreateWorkloadQuery.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include "IParserBase.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// CREATE WORKLOAD production IN all SETTINGS weight = 3, max_speed = '1G' FOR network_read, max_speed = '2G' FOR network_write
|
||||
class ParserCreateWorkloadQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "CREATE WORKLOAD query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
52
src/Parsers/ParserDropResourceQuery.cpp
Normal file
52
src/Parsers/ParserDropResourceQuery.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ParserDropResourceQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ParserDropResourceQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_drop(Keyword::DROP);
|
||||
ParserKeyword s_resource(Keyword::RESOURCE);
|
||||
ParserKeyword s_if_exists(Keyword::IF_EXISTS);
|
||||
ParserKeyword s_on(Keyword::ON);
|
||||
ParserIdentifier resource_name_p;
|
||||
|
||||
String cluster_str;
|
||||
bool if_exists = false;
|
||||
|
||||
ASTPtr resource_name;
|
||||
|
||||
if (!s_drop.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!s_resource.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_if_exists.ignore(pos, expected))
|
||||
if_exists = true;
|
||||
|
||||
if (!resource_name_p.parse(pos, resource_name, expected))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto drop_resource_query = std::make_shared<ASTDropResourceQuery>();
|
||||
drop_resource_query->if_exists = if_exists;
|
||||
drop_resource_query->cluster = std::move(cluster_str);
|
||||
|
||||
node = drop_resource_query;
|
||||
|
||||
drop_resource_query->resource_name = resource_name->as<ASTIdentifier &>().name();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
14
src/Parsers/ParserDropResourceQuery.h
Normal file
14
src/Parsers/ParserDropResourceQuery.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "IParserBase.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// DROP RESOURCE resource1
|
||||
class ParserDropResourceQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "DROP RESOURCE query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
}
|
52
src/Parsers/ParserDropWorkloadQuery.cpp
Normal file
52
src/Parsers/ParserDropWorkloadQuery.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ParserDropWorkloadQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ParserDropWorkloadQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_drop(Keyword::DROP);
|
||||
ParserKeyword s_workload(Keyword::WORKLOAD);
|
||||
ParserKeyword s_if_exists(Keyword::IF_EXISTS);
|
||||
ParserKeyword s_on(Keyword::ON);
|
||||
ParserIdentifier workload_name_p;
|
||||
|
||||
String cluster_str;
|
||||
bool if_exists = false;
|
||||
|
||||
ASTPtr workload_name;
|
||||
|
||||
if (!s_drop.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!s_workload.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_if_exists.ignore(pos, expected))
|
||||
if_exists = true;
|
||||
|
||||
if (!workload_name_p.parse(pos, workload_name, expected))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto drop_workload_query = std::make_shared<ASTDropWorkloadQuery>();
|
||||
drop_workload_query->if_exists = if_exists;
|
||||
drop_workload_query->cluster = std::move(cluster_str);
|
||||
|
||||
node = drop_workload_query;
|
||||
|
||||
drop_workload_query->workload_name = workload_name->as<ASTIdentifier &>().name();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
14
src/Parsers/ParserDropWorkloadQuery.h
Normal file
14
src/Parsers/ParserDropWorkloadQuery.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "IParserBase.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// DROP WORKLOAD workload1
|
||||
class ParserDropWorkloadQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "DROP WORKLOAD query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user