From 64d130f8a2b8614d78177c5a9381489a915814b5 Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 14 Sep 2020 15:39:33 -0400 Subject: [PATCH 001/697] Adding support for `[PERIODIC] REFRESH [value_sec]` clause when creating LIVE VIEW tables. --- src/Core/Defines.h | 1 + src/Core/Settings.h | 1 + src/Parsers/ASTCreateQuery.cpp | 21 ++- src/Parsers/ASTCreateQuery.h | 2 + src/Parsers/ParserCreateQuery.cpp | 38 ++++- .../LiveView/LiveViewBlockOutputStream.h | 10 ++ src/Storages/LiveView/StorageLiveView.cpp | 133 +++++++++++++----- src/Storages/LiveView/StorageLiveView.h | 36 ++++- 8 files changed, 196 insertions(+), 46 deletions(-) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index e244581c339..8920d44fdb4 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -36,6 +36,7 @@ #define DEFAULT_MERGE_BLOCK_SIZE 8192 #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 +#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15 #define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b39c223a5e9..f7ecab5fecb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -369,6 +369,7 @@ class IColumn; M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ + M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 73903e28f84..9b6c62b026f 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -248,9 +248,24 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (uuid != UUIDHelpers::Nil) settings.ostr << (settings.hilite ? hilite_keyword : "") << " UUID " << (settings.hilite ? hilite_none : "") << quoteString(toString(uuid)); - if (live_view_timeout) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "") - << *live_view_timeout; + + if (live_view_timeout || live_view_periodic_refresh) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH"; + + if (live_view_timeout) + settings.ostr << " TIMEOUT " << (settings.hilite ? hilite_none : "") << *live_view_timeout; + + if (live_view_periodic_refresh) + { + if (live_view_timeout) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : ""); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") + << *live_view_periodic_refresh; + } + } + formatOnCluster(settings); } else diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 5d69d86bd61..a75df184842 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -76,6 +76,8 @@ public: ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) std::optional live_view_timeout; /// For CREATE LIVE VIEW ... WITH TIMEOUT ... + std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... + bool attach_short_syntax{false}; /** Get the text that identifies this element. */ diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 55208ca4133..a0bddabcc1d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -496,10 +496,14 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr as_table; ASTPtr select; ASTPtr live_view_timeout; + ASTPtr live_view_periodic_refresh; String cluster_str; bool attach = false; bool if_not_exists = false; + bool with_and = false; + bool with_timeout = false; + bool with_periodic_refresh = false; if (!s_create.ignore(pos, expected)) { @@ -521,10 +525,35 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!table_name_p.parse(pos, table, expected)) return false; - if (ParserKeyword{"WITH TIMEOUT"}.ignore(pos, expected)) + if (ParserKeyword{"WITH"}.ignore(pos, expected)) { - if (!ParserNumber{}.parse(pos, live_view_timeout, expected)) - live_view_timeout = std::make_shared(static_cast(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC)); + if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected)) + { + if (!ParserNumber{}.parse(pos, live_view_timeout, expected)) + { + live_view_timeout = std::make_shared(static_cast(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC)); + } + + /// Optional - AND + if (ParserKeyword{"AND"}.ignore(pos, expected)) + with_and = true; + + with_timeout = true; + } + + if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected)) + { + if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected)) + live_view_periodic_refresh = std::make_shared(static_cast(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC)); + + with_periodic_refresh = true; + } + + else if (with_and) + return false; + + if (!with_timeout && !with_periodic_refresh) + return false; } if (ParserKeyword{"ON"}.ignore(pos, expected)) @@ -583,6 +612,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (live_view_timeout) query->live_view_timeout.emplace(live_view_timeout->as().value.safeGet()); + if (live_view_periodic_refresh) + query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as().value.safeGet()); + return true; } diff --git a/src/Storages/LiveView/LiveViewBlockOutputStream.h b/src/Storages/LiveView/LiveViewBlockOutputStream.h index 548bcf1b86a..5a1f75a8c2f 100644 --- a/src/Storages/LiveView/LiveViewBlockOutputStream.h +++ b/src/Storages/LiveView/LiveViewBlockOutputStream.h @@ -34,6 +34,7 @@ public: { new_blocks_metadata->hash = key_str; new_blocks_metadata->version = storage.getBlocksVersion() + 1; + new_blocks_metadata->time = std::chrono::system_clock::now(); for (auto & block : *new_blocks) { @@ -48,6 +49,15 @@ public: storage.condition.notify_all(); } + else + { + // only update blocks time + new_blocks_metadata->hash = storage.getBlocksHashKey(); + new_blocks_metadata->version = storage.getBlocksVersion(); + new_blocks_metadata->time = std::chrono::system_clock::now(); + + (*storage.blocks_metadata_ptr) = new_blocks_metadata; + } new_blocks.reset(); new_blocks_metadata.reset(); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 7095357a161..48dcab56f8c 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include +#include #include #include @@ -254,6 +255,8 @@ StorageLiveView::StorageLiveView( live_view_context = std::make_unique(global_context); live_view_context->makeQueryContext(); + log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")"); + StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); @@ -275,12 +278,21 @@ StorageLiveView::StorageLiveView( if (query.live_view_timeout) { is_temporary = true; - temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout}; + temporary_live_view_timeout = Seconds {*query.live_view_timeout}; + } + + if (query.live_view_periodic_refresh) + { + is_periodically_refreshed = true; + periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh}; } blocks_ptr = std::make_shared(); blocks_metadata_ptr = std::make_shared(); active_ptr = std::make_shared(true); + + periodic_refresh_task = global_context.getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); }); + periodic_refresh_task->deactivate(); } Block StorageLiveView::getHeader() const @@ -364,10 +376,20 @@ bool StorageLiveView::getNewBlocks() } new_blocks_metadata->hash = key.toHexString(); new_blocks_metadata->version = getBlocksVersion() + 1; + new_blocks_metadata->time = std::chrono::system_clock::now(); + (*blocks_ptr) = new_blocks; (*blocks_metadata_ptr) = new_blocks_metadata; + updated = true; } + else { + new_blocks_metadata->hash = getBlocksHashKey(); + new_blocks_metadata->version = getBlocksVersion(); + new_blocks_metadata->time = std::chrono::system_clock::now(); + + (*blocks_metadata_ptr) = new_blocks_metadata; + } } return updated; } @@ -387,11 +409,18 @@ void StorageLiveView::startup() { if (is_temporary) TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast(shared_from_this())); + + if (is_periodically_refreshed) + periodic_refresh_task->activate(); } void StorageLiveView::shutdown() { shutdown_called = true; + + if (is_periodically_refreshed) + periodic_refresh_task->deactivate(); + DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID()); } @@ -410,15 +439,55 @@ void StorageLiveView::drop() condition.notify_all(); } -void StorageLiveView::refresh() +void StorageLiveView::scheduleNextPeriodicRefresh() +{ + Seconds current_time = std::chrono::duration_cast (std::chrono::system_clock::now().time_since_epoch()); + Seconds blocks_time = std::chrono::duration_cast (getBlocksTime().time_since_epoch()); + + if ( (current_time - periodic_live_view_refresh) >= blocks_time ) + { + refresh(false); + blocks_time = std::chrono::duration_cast (getBlocksTime().time_since_epoch()); + } + current_time = std::chrono::duration_cast (std::chrono::system_clock::now().time_since_epoch()); + + auto next_refresh_time = blocks_time + periodic_live_view_refresh; + + if (current_time >= next_refresh_time) + periodic_refresh_task->scheduleAfter(0); + else + { + auto schedule_time = std::chrono::duration_cast (next_refresh_time - current_time); + periodic_refresh_task->scheduleAfter(static_cast(schedule_time.count())); + } +} + +void StorageLiveView::periodicRefreshTaskFunc() +{ + LOG_TRACE(log, "periodic refresh task"); + + std::lock_guard lock(mutex); + + if (hasActiveUsers()) + scheduleNextPeriodicRefresh(); +} + +void StorageLiveView::refresh(bool grab_lock) { // Lock is already acquired exclusively from InterperterAlterQuery.cpp InterpreterAlterQuery::execute() method. // So, reacquiring lock is not needed and will result in an exception. + + if (grab_lock) { std::lock_guard lock(mutex); if (getNewBlocks()) condition.notify_all(); } + else + { + if (getNewBlocks()) + condition.notify_all(); + } } Pipe StorageLiveView::read( @@ -430,15 +499,21 @@ Pipe StorageLiveView::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { + std::lock_guard lock(mutex); + + if (!(*blocks_ptr)) + refresh(false); + + else if (is_periodically_refreshed) { - std::lock_guard lock(mutex); - if (!(*blocks_ptr)) - { - if (getNewBlocks()) - condition.notify_all(); - } - return Pipe(std::make_shared(blocks_ptr, getHeader())); + Seconds current_time = std::chrono::duration_cast (std::chrono::system_clock::now().time_since_epoch()); + Seconds blocks_time = std::chrono::duration_cast (getBlocksTime().time_since_epoch()); + + if ( (current_time - periodic_live_view_refresh) >= blocks_time ) + refresh(false); } + + return Pipe(std::make_shared(blocks_ptr, getHeader())); } BlockInputStreams StorageLiveView::watch( @@ -453,6 +528,7 @@ BlockInputStreams StorageLiveView::watch( bool has_limit = false; UInt64 limit = 0; + BlockInputStreamPtr reader; if (query.limit_length) { @@ -461,45 +537,28 @@ BlockInputStreams StorageLiveView::watch( } if (query.is_watch_events) - { - auto reader = std::make_shared( + reader = std::make_shared( std::static_pointer_cast(shared_from_this()), blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit, context.getSettingsRef().live_view_heartbeat_interval.totalSeconds()); - - { - std::lock_guard lock(mutex); - if (!(*blocks_ptr)) - { - if (getNewBlocks()) - condition.notify_all(); - } - } - - processed_stage = QueryProcessingStage::Complete; - - return { reader }; - } else - { - auto reader = std::make_shared( + reader = std::make_shared( std::static_pointer_cast(shared_from_this()), blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit, context.getSettingsRef().live_view_heartbeat_interval.totalSeconds()); - { - std::lock_guard lock(mutex); - if (!(*blocks_ptr)) - { - if (getNewBlocks()) - condition.notify_all(); - } - } + { + std::lock_guard lock(mutex); - processed_stage = QueryProcessingStage::Complete; + if (!(*blocks_ptr)) + refresh(false); - return { reader }; + if (is_periodically_refreshed) + scheduleNextPeriodicRefresh(); } + + processed_stage = QueryProcessingStage::Complete; + return { reader }; } NamesAndTypesList StorageLiveView::getVirtuals() const diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fe4be6ee08e..4a219431c0d 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -13,6 +13,7 @@ limitations under the License. */ #include #include +#include #include #include @@ -21,10 +22,16 @@ limitations under the License. */ namespace DB { +using Time = std::chrono::time_point; +using Seconds = std::chrono::seconds; +using MilliSeconds = std::chrono::milliseconds; + + struct BlocksMetadata { String hash; UInt64 version; + Time time; }; struct MergeableBlocks @@ -75,8 +82,10 @@ public: NamesAndTypesList getVirtuals() const override; bool isTemporary() const { return is_temporary; } - std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; } + bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; } + Seconds getTimeout() const { return temporary_live_view_timeout; } + Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; } /// Check if we have any readers /// must be called with mutex locked @@ -109,6 +118,15 @@ public: return 0; } + /// Get blocks time + /// must be called with mutex locked + Time getBlocksTime() + { + if (*blocks_metadata_ptr) + return (*blocks_metadata_ptr)->time; + return {}; + } + /// Reset blocks /// must be called with mutex locked void reset() @@ -124,7 +142,7 @@ public: void startup() override; void shutdown() override; - void refresh(); + void refresh(const bool grab_lock = true); Pipe read( const Names & column_names, @@ -176,8 +194,13 @@ private: Context & global_context; std::unique_ptr live_view_context; + Poco::Logger * log; + bool is_temporary = false; - std::chrono::seconds temporary_live_view_timeout; + bool is_periodically_refreshed = false; + + Seconds temporary_live_view_timeout; + Seconds periodic_live_view_refresh; /// Mutex to protect access to sample block and inner_blocks_query mutable std::mutex sample_block_lock; @@ -199,6 +222,13 @@ private: std::atomic shutdown_called = false; + /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement + BackgroundSchedulePool::TaskHolder periodic_refresh_task; + void periodicRefreshTaskFunc(); + + /// Must be called with mutex locked + void scheduleNextPeriodicRefresh(); + StorageLiveView( const StorageID & table_id_, Context & local_context, From 8ea4c2e26fbf51e8aa59f6ea3bf4e9b366182d67 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 24 Sep 2020 14:42:41 +0300 Subject: [PATCH 002/697] Fix TTL in cases, when its expression is a function and is the same as ORDER BY key --- src/DataStreams/TTLBlockInputStream.cpp | 56 ++++++------- .../MergeTree/MergeTreeDataWriter.cpp | 42 ++++------ src/Storages/TTLDescription.cpp | 49 ++---------- .../01506_ttl_same_with_order_by.reference | 4 + .../01506_ttl_same_with_order_by.sql | 78 +++++++++++++++++++ 5 files changed, 128 insertions(+), 101 deletions(-) create mode 100644 tests/queries/0_stateless/01506_ttl_same_with_order_by.reference create mode 100644 tests/queries/0_stateless/01506_ttl_same_with_order_by.sql diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 85d9c7fead2..6dba8968f79 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -86,6 +86,7 @@ TTLBlockInputStream::TTLBlockInputStream( if (descr.arguments.empty()) for (const auto & name : descr.argument_names) descr.arguments.push_back(header.getPositionByName(name)); + agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size()); const Settings & settings = storage.global_context.getSettingsRef(); @@ -153,19 +154,26 @@ void TTLBlockInputStream::readSuffixImpl() LOG_INFO(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name); } +static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, const Block & block, const String & result_column) +{ + if (block.has(result_column)) + return block.getByName(result_column).column; + + Block block_copy; + for (const auto & column_name : expression.getRequiredColumns()) + block_copy.insert(block.getByName(column_name)); + + expression.execute(block_copy); + return block_copy.getByName(result_column).column; +} + void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) { auto rows_ttl = metadata_snapshot->getRowsTTL(); + auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column); - rows_ttl.expression->execute(block); - if (rows_ttl.where_expression) - rows_ttl.where_expression->execute(block); - - const IColumn * ttl_column = - block.getByName(rows_ttl.result_column).column.get(); - - const IColumn * where_result_column = rows_ttl.where_expression ? - block.getByName(rows_ttl.where_result_column).column.get() : nullptr; + auto where_result_column = rows_ttl.where_expression ? + extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column) : nullptr; const auto & column_names = header.getNames(); @@ -181,7 +189,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) for (size_t i = 0; i < block.rows(); ++i) { - UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); bool where_filter_passed = !where_result_column || where_result_column->getBool(i); if (!isTTLExpired(cur_ttl) || !where_filter_passed) { @@ -206,7 +214,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); for (size_t i = 0; i < block.rows(); ++i) { - UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); bool where_filter_passed = !where_result_column || where_result_column->getBool(i); bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; @@ -221,6 +229,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) same_as_current = false; } } + if (!same_as_current) { if (rows_with_current_key) @@ -311,7 +320,6 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) defaults_expression->execute(block_with_defaults); } - std::vector columns_to_remove; for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) { /// If we read not all table columns. E.g. while mutation. @@ -329,11 +337,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) if (isTTLExpired(old_ttl_info.max)) continue; - if (!block.has(ttl_entry.result_column)) - { - columns_to_remove.push_back(ttl_entry.result_column); - ttl_entry.expression->execute(block); - } + auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column); ColumnPtr default_column = nullptr; if (block_with_defaults.has(name)) @@ -344,11 +348,9 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) MutableColumnPtr result_column = values_column->cloneEmpty(); result_column->reserve(block.rows()); - const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get(); - for (size_t i = 0; i < block.rows(); ++i) { - UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); if (isTTLExpired(cur_ttl)) { if (default_column) @@ -365,34 +367,24 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) } column_with_type.column = std::move(result_column); } - - for (const String & column : columns_to_remove) - block.erase(column); } void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map) { - std::vector columns_to_remove; for (const auto & ttl_entry : descriptions) { auto & new_ttl_info = ttl_info_map[ttl_entry.result_column]; if (!block.has(ttl_entry.result_column)) - { - columns_to_remove.push_back(ttl_entry.result_column); ttl_entry.expression->execute(block); - } - const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get(); + auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column); for (size_t i = 0; i < block.rows(); ++i) { - UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); new_ttl_info.update(cur_ttl); } } - - for (const String & column : columns_to_remove) - block.erase(column); } void TTLBlockInputStream::updateMovesTTL(Block & block) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 739aff31a06..d5a2bfe280e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -84,19 +84,14 @@ void updateTTL( const TTLDescription & ttl_entry, IMergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, - Block & block, + const Block & block, bool update_part_min_max_ttls) { - bool remove_column = false; - if (!block.has(ttl_entry.result_column)) - { - ttl_entry.expression->execute(block); - remove_column = true; - } + Block block_copy = block; + if (!block_copy.has(ttl_entry.result_column)) + ttl_entry.expression->execute(block_copy); - const auto & current = block.getByName(ttl_entry.result_column); - - const IColumn * column = current.column.get(); + const IColumn * column = block_copy.getByName(ttl_entry.result_column).column.get(); if (const ColumnUInt16 * column_date = typeid_cast(column)) { const auto & date_lut = DateLUT::instance(); @@ -127,9 +122,6 @@ void updateTTL( if (update_part_min_max_ttls) ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); - - if (remove_column) - block.erase(ttl_entry.result_column); } } @@ -271,6 +263,18 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa sync_guard.emplace(disk, full_path); } + if (metadata_snapshot->hasRowsTTL()) + updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); + + for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); + + const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs(); + for (const auto & ttl_entry : recompression_ttl_entries) + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false); + + new_data_part->ttl_infos.update(move_ttl_infos); + /// If we need to calculate some columns to sort. if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices()) data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot)->execute(block); @@ -299,18 +303,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (metadata_snapshot->hasRowsTTL()) - updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); - - for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) - updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); - - const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs(); - for (const auto & ttl_entry : recompression_ttl_entries) - updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false); - - new_data_part->ttl_infos.update(move_ttl_infos); - /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. auto compression_codec = data.global_context.chooseCompressionCodec(0, 0); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 7f55badf819..7499f1de292 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include #include @@ -196,59 +198,20 @@ TTLDescription TTLDescription::getTTLFromAST( ErrorCodes::BAD_TTL_EXPRESSION); } - for (const auto & [name, value] : ttl_element->group_by_aggregations) - { - if (primary_key_columns_set.count(name)) - throw Exception( - "Can not set custom aggregation for column in primary key in TTL Expression", - ErrorCodes::BAD_TTL_EXPRESSION); - + for (const auto & [name, _] : ttl_element->group_by_aggregations) aggregation_columns_set.insert(name); - } if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size()) throw Exception( "Multiple aggregations set for one column in TTL Expression", ErrorCodes::BAD_TTL_EXPRESSION); - result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); - auto aggregations = ttl_element->group_by_aggregations; - for (size_t i = 0; i < pk_columns.size(); ++i) + for (const auto & column : columns.getOrdinary()) { - ASTPtr value = primary_key.expression_list_ast->children[i]->clone(); - - if (i >= ttl_element->group_by_key.size()) - { - ASTPtr value_max = makeASTFunction("max", value->clone()); - aggregations.emplace_back(value->getColumnName(), std::move(value_max)); - } - - if (value->as()) - { - auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true); - auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false); - for (const auto & column : expr_actions->getRequiredColumns()) - { - if (i < ttl_element->group_by_key.size()) - { - ASTPtr expr = makeASTFunction("any", std::make_shared(column)); - aggregations.emplace_back(column, std::move(expr)); - } - else - { - ASTPtr expr = makeASTFunction("argMax", std::make_shared(column), value->clone()); - aggregations.emplace_back(column, std::move(expr)); - } - } - } - } - - for (const auto & column : columns.getAllPhysical()) - { - if (!primary_key_columns_set.count(column.name) && !aggregation_columns_set.count(column.name)) + if (!aggregation_columns_set.count(column.name)) { ASTPtr expr = makeASTFunction("any", std::make_shared(column.name)); aggregations.emplace_back(column.name, std::move(expr)); @@ -280,8 +243,6 @@ TTLDescription TTLDescription::getTTLFromAST( } checkTTLExpression(result.expression, result.result_column); - - return result; } diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference new file mode 100644 index 00000000000..f8f36434a82 --- /dev/null +++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference @@ -0,0 +1,4 @@ +2020-01-01 00:00:00 3 +2020-01-01 00:00:00 2020-01-01 00:00:00 111 +1 +0 diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql new file mode 100644 index 00000000000..7a0fb86330b --- /dev/null +++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 +) +ENGINE=SummingMergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (toStartOfHour(timestamp), timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY toStartOfHour(timestamp) +SET bytes=max(bytes); + +INSERT INTO derived_metrics_local values('2020-01-01 00:00:00', 1); +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 3); +INSERT INTO derived_metrics_local values('2020-01-01 00:02:00', 2); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT * FROM derived_metrics_local; + +DROP TABLE derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + timestamp_h DateTime materialized toStartOfHour(timestamp), + bytes UInt64 +) +ENGINE=SummingMergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (timestamp_h, timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY timestamp_h +SET bytes=max(bytes), timestamp = toStartOfHour(any(timestamp)); + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111); +INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22); +INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT timestamp, timestamp_h, bytes FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR +) +ENGINE=MergeTree() +ORDER BY (toStartOfHour(timestamp), timestamp) +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111) ('2020-01-01 00:19:22', 22) ('2100-01-01 00:19:22', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT sum(bytes) FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 +) +ENGINE=MergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (toStartOfHour(timestamp), timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111); +INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22); +INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT count() FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; From 0d79474acc3cfb6f2c8dfbed26aa5d5f0346fc4f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 30 Sep 2020 19:10:15 +0300 Subject: [PATCH 003/697] Fix TTL with GROUP BY and fix test according to new logic --- src/DataStreams/TTLBlockInputStream.cpp | 184 ++++++++++-------- src/DataStreams/TTLBlockInputStream.h | 7 +- src/Storages/TTLDescription.cpp | 17 +- .../01280_ttl_where_group_by.reference | 16 +- .../0_stateless/01280_ttl_where_group_by.sh | 8 +- .../01280_ttl_where_group_by_negative.sql | 3 - 6 files changed, 134 insertions(+), 101 deletions(-) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 6dba8968f79..5c49b9f11c2 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -88,7 +88,6 @@ TTLBlockInputStream::TTLBlockInputStream( descr.arguments.push_back(header.getPositionByName(name)); agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size()); - const Settings & settings = storage.global_context.getSettingsRef(); Aggregator::Params params(header, keys, aggregates, @@ -108,14 +107,15 @@ Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); - if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY + if (metadata_snapshot->hasRowsTTL() + && !storage_rows_ttl.where_expression + && storage_rows_ttl.mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max)) { rows_removed = data_part->rows_count; return {}; } - Block block = children.at(0)->read(); if (!block) { @@ -130,10 +130,9 @@ Block TTLBlockInputStream::readImpl() } if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) - removeRowsWithExpiredTableTTL(block); + executeRowsTTL(block); removeValuesWithExpiredColumnTTL(block); - updateMovesTTL(block); updateRecompressionTTL(block); @@ -167,107 +166,117 @@ static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, co return block_copy.getByName(result_column).column; } -void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) +void TTLBlockInputStream::executeRowsTTL(Block & block) { auto rows_ttl = metadata_snapshot->getRowsTTL(); auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column); auto where_result_column = rows_ttl.where_expression ? - extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column) : nullptr; + extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column): nullptr; + if (aggregator) + aggregateRowsWithExpiredTTL(block, ttl_column, where_result_column); + else + removeRowsWithExpiredTTL(block, ttl_column, where_result_column); +} + +void TTLBlockInputStream::removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column) +{ + MutableColumns result_columns; const auto & column_names = header.getNames(); - if (!aggregator) + result_columns.reserve(column_names.size()); + for (auto it = column_names.begin(); it != column_names.end(); ++it) { - MutableColumns result_columns; - result_columns.reserve(column_names.size()); - for (auto it = column_names.begin(); it != column_names.end(); ++it) - { - const IColumn * values_column = block.getByName(*it).column.get(); - MutableColumnPtr result_column = values_column->cloneEmpty(); - result_column->reserve(block.rows()); + const IColumn * values_column = block.getByName(*it).column.get(); + MutableColumnPtr result_column = values_column->cloneEmpty(); + result_column->reserve(block.rows()); - for (size_t i = 0; i < block.rows(); ++i) - { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - bool where_filter_passed = !where_result_column || where_result_column->getBool(i); - if (!isTTLExpired(cur_ttl) || !where_filter_passed) - { - new_ttl_infos.table_ttl.update(cur_ttl); - result_column->insertFrom(*values_column, i); - } - else if (it == column_names.begin()) - ++rows_removed; - } - result_columns.emplace_back(std::move(result_column)); - } - block = header.cloneWithColumns(std::move(result_columns)); - } - else - { - MutableColumns result_columns = header.cloneEmptyColumns(); - MutableColumns aggregate_columns = header.cloneEmptyColumns(); - - size_t rows_aggregated = 0; - size_t current_key_start = 0; - size_t rows_with_current_key = 0; - auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); for (size_t i = 0; i < block.rows(); ++i) { UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - bool where_filter_passed = !where_result_column || where_result_column->getBool(i); - bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; - - bool same_as_current = true; - for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j) - { - const String & key_column = storage_rows_ttl.group_by_keys[j]; - const IColumn * values_column = block.getByName(key_column).column.get(); - if (!same_as_current || (*values_column)[i] != current_key_value[j]) - { - values_column->get(i, current_key_value[j]); - same_as_current = false; - } - } - - if (!same_as_current) - { - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); - finalizeAggregates(result_columns); - - current_key_start = rows_aggregated; - rows_with_current_key = 0; - } - - if (ttl_expired) - { - ++rows_with_current_key; - ++rows_aggregated; - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = aggregate_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } - } - else + bool where_filter_passed = !where_column || where_column->getBool(i); + if (!isTTLExpired(cur_ttl) || !where_filter_passed) { new_ttl_infos.table_ttl.update(cur_ttl); - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = result_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } + result_column->insertFrom(*values_column, i); + } + else if (it == column_names.begin()) + ++rows_removed; + } + + result_columns.emplace_back(std::move(result_column)); + } + + block = header.cloneWithColumns(std::move(result_columns)); +} + +void TTLBlockInputStream::aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column) +{ + const auto & column_names = header.getNames(); + MutableColumns result_columns = header.cloneEmptyColumns(); + MutableColumns aggregate_columns = header.cloneEmptyColumns(); + + size_t rows_aggregated = 0; + size_t current_key_start = 0; + size_t rows_with_current_key = 0; + auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); + bool where_filter_passed = !where_column || where_column->getBool(i); + bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; + + bool same_as_current = true; + for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j) + { + const String & key_column = storage_rows_ttl.group_by_keys[j]; + const IColumn * values_column = block.getByName(key_column).column.get(); + if (!same_as_current || (*values_column)[i] != current_key_value[j]) + { + values_column->get(i, current_key_value[j]); + same_as_current = false; } } - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + if (!same_as_current) + { + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + finalizeAggregates(result_columns); - block = header.cloneWithColumns(std::move(result_columns)); + current_key_start = rows_aggregated; + rows_with_current_key = 0; + } + + if (ttl_expired) + { + ++rows_with_current_key; + ++rows_aggregated; + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = aggregate_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + else + { + new_ttl_infos.table_ttl.update(cur_ttl); + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = result_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } } + + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + + block = header.cloneWithColumns(std::move(result_columns)); } void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length) @@ -294,12 +303,14 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) { for (const auto & it : storage_rows_ttl.set_parts) it.expression->execute(agg_block); + for (const auto & name : storage_rows_ttl.group_by_keys) { const IColumn * values_column = agg_block.getByName(name).column.get(); auto & result_column = result_columns[header.getPositionByName(name)]; result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); } + for (const auto & it : storage_rows_ttl.set_parts) { const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get(); @@ -308,6 +319,7 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) } } } + agg_result.invalidate(); } diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h index 1d3b69f61c5..bbe1f8782a4 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/DataStreams/TTLBlockInputStream.h @@ -67,8 +67,13 @@ private: /// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part void removeValuesWithExpiredColumnTTL(Block & block); + void executeRowsTTL(Block & block); + /// Removes rows with expired table ttl and computes new ttl_infos for part - void removeRowsWithExpiredTableTTL(Block & block); + void removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column); + + /// Aggregates rows with expired table ttl and computes new ttl_infos for part + void aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column); // Calculate aggregates of aggregate_columns into agg_result void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 7499f1de292..e412653a972 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -184,11 +184,8 @@ TTLDescription TTLDescription::getTTLFromAST( if (ttl_element->group_by_key.size() > pk_columns.size()) throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION); - NameSet primary_key_columns_set(pk_columns.begin(), pk_columns.end()); NameSet aggregation_columns_set; - - for (const auto & column : primary_key.expression->getRequiredColumns()) - primary_key_columns_set.insert(column); + NameSet used_primary_key_columns_set; for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i) { @@ -196,6 +193,8 @@ TTLDescription TTLDescription::getTTLFromAST( throw Exception( "TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION); + + used_primary_key_columns_set.insert(pk_columns[i]); } for (const auto & [name, _] : ttl_element->group_by_aggregations) @@ -209,9 +208,17 @@ TTLDescription TTLDescription::getTTLFromAST( result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); auto aggregations = ttl_element->group_by_aggregations; + const auto & primary_key_expressions = primary_key.expression_list_ast->children; + for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i) + { + ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone()); + aggregations.emplace_back(pk_columns[i], std::move(expr)); + aggregation_columns_set.insert(pk_columns[i]); + } + for (const auto & column : columns.getOrdinary()) { - if (!aggregation_columns_set.count(column.name)) + if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name)) { ASTPtr expr = makeASTFunction("any", std::make_shared(column.name)); aggregations.emplace_back(column.name, std::move(expr)); diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference index ad20d38f2e6..7fe00709dee 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference @@ -1,20 +1,26 @@ +ttl_01280_1 1 1 0 4 1 2 3 7 1 3 0 5 2 1 0 1 2 1 20 1 +ttl_01280_2 1 1 [0,2,3] 4 1 1 [5,4,1] 13 1 3 [1,0,1,0] 17 2 1 [3,1,0,3] 8 3 1 [2,4,5] 8 +ttl_01280_3 1 1 0 4 -1 3 10 6 +1 1 10 6 2 1 0 3 -3 5 8 2 +3 1 8 2 +ttl_01280_4 1 1 0 4 -3 3 13 9 +10 2 13 9 +ttl_01280_5 1 2 7 5 2 3 6 5 -1 2 3 5 -2 3 3 5 +ttl_01280_6 +1 5 3 5 +2 10 3 5 diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh index 9b05606f928..531f2951d36 100755 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh @@ -13,6 +13,7 @@ function optimize() done } +echo "ttl_01280_1" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5; insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10); @@ -29,6 +30,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_1 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_2" +echo "ttl_01280_2" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d); insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10); @@ -47,6 +49,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_2 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3" +echo "ttl_01280_3" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d); insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10); @@ -65,6 +68,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_3 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_4" +echo "ttl_01280_4" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y); insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10); @@ -79,7 +83,8 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5" -$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x); +echo "ttl_01280_5" +$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x), b = argMax(b, -b); insert into ttl_01280_5 values (1, 2, 3, 5, now()); insert into ttl_01280_5 values (2, 10, 1, 5, now()); insert into ttl_01280_5 values (2, 3, 5, 5, now()); @@ -91,6 +96,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_5 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_6" +echo "ttl_01280_6" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a; insert into ttl_01280_6 values (1, 2, 3, 5, now()); diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql index f2c26a3d495..b273e065bcc 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql +++ b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql @@ -1,7 +1,4 @@ create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by x set y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by b set y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b, x set y = max(y); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set y = max(y), y = max(y); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a) ttl d + interval 1 second group by toDate(d), a set d = min(d), b = max(b); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (d, -(a + b)) ttl d + interval 1 second group by d, -(a + b) set a = sum(a), b = min(b); -- { serverError 450} From 279853b16a2bca4f931be0237fff97c5ba702b1f Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 20 Nov 2020 02:02:18 +0400 Subject: [PATCH 004/697] WIP: Implement group extraction and role mapping --- src/Access/AccessControlManager.cpp | 2 +- src/Access/Authentication.cpp | 20 +- src/Access/Authentication.h | 7 + src/Access/LDAPAccessStorage.cpp | 359 ++++++++++++++++++++++++---- src/Access/LDAPAccessStorage.h | 20 +- src/Access/LDAPClient.cpp | 217 +++++++++++++++-- src/Access/LDAPClient.h | 5 +- src/Access/LDAPParams.h | 35 +++ 8 files changed, 588 insertions(+), 77 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index a95d65ebb59..5aa9699d96f 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -292,7 +292,7 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig( else if (type == "ldap") type = LDAPAccessStorage::STORAGE_TYPE; - String name = config.getString(prefix + ".name", type); + String name = config.getString(prefix + ".name", key_in_user_directories); if (type == MemoryAccessStorage::STORAGE_TYPE) { diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index d29e2f897e8..27a62e3a2af 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -82,12 +82,7 @@ bool Authentication::isCorrectPassword(const String & password_, const String & case LDAP_SERVER: { - auto ldap_server_params = external_authenticators.getLDAPServerParams(server_name); - ldap_server_params.user = user_; - ldap_server_params.password = password_; - - LDAPSimpleAuthClient ldap_client(ldap_server_params); - return ldap_client.check(); + return isCorrectPasswordLDAP(password_, user_, external_authenticators); } case MAX_TYPE: @@ -96,4 +91,17 @@ bool Authentication::isCorrectPassword(const String & password_, const String & throw Exception("Cannot check if the password is correct for authentication type " + toString(type), ErrorCodes::NOT_IMPLEMENTED); } +bool Authentication::isCorrectPasswordLDAP(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators, const LDAPSearchParamsList * search_params, LDAPSearchResultsList * search_results) const +{ + if (type != LDAP_SERVER) + throw Exception("Cannot check if the password is correct using LDAP logic for authentication type " + toString(type), ErrorCodes::BAD_ARGUMENTS); + + auto ldap_server_params = external_authenticators.getLDAPServerParams(server_name); + ldap_server_params.user = user_; + ldap_server_params.password = password_; + + LDAPSimpleAuthClient ldap_client(ldap_server_params); + return ldap_client.authenticate(search_params, search_results); +} + } diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 38714339221..87f320d6e1a 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB @@ -19,6 +21,10 @@ namespace ErrorCodes } class ExternalAuthenticators; +struct LDAPSearchParams; +using LDAPSearchParamsList = std::vector; +using LDAPSearchResults = std::set; +using LDAPSearchResultsList = std::vector; /// Authentication type and encrypted password for checking when an user logins. class Authentication @@ -90,6 +96,7 @@ public: /// Checks if the provided password is correct. Returns false if not. /// User name and external authenticators' info are used only by some specific authentication type (e.g., LDAP_SERVER). bool isCorrectPassword(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators) const; + bool isCorrectPasswordLDAP(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators, const LDAPSearchParamsList * search_params = nullptr, LDAPSearchResultsList * search_results = nullptr) const; friend bool operator ==(const Authentication & lhs, const Authentication & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash); } friend bool operator !=(const Authentication & lhs, const Authentication & rhs) { return !(lhs == rhs); } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 92de7fce8d7..7218c317b8c 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -9,8 +10,10 @@ #include #include #include +#include #include #include +#include #include @@ -44,6 +47,7 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m const bool has_server = config.has(prefix_str + "server"); const bool has_roles = config.has(prefix_str + "roles"); + const bool has_role_mapping = config.has(prefix_str + "role_mapping"); if (!has_server) throw Exception("Missing 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS); @@ -52,20 +56,75 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m if (ldap_server_cfg.empty()) throw Exception("Empty 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS); - std::set roles_cfg; + std::set common_roles_cfg; if (has_roles) { Poco::Util::AbstractConfiguration::Keys role_names; config.keys(prefix_str + "roles", role_names); // Currently, we only extract names of roles from the section names and assign them directly and unconditionally. - roles_cfg.insert(role_names.begin(), role_names.end()); + common_roles_cfg.insert(role_names.begin(), role_names.end()); + } + + LDAPSearchParamsList role_search_params_cfg; + if (has_role_mapping) + { + Poco::Util::AbstractConfiguration::Keys all_keys; + config.keys(prefix, all_keys); + for (const auto & key : all_keys) + { + if (key != "role_mapping" && key.find("role_mapping[") != 0) + continue; + + const String rm_prefix = prefix_str + key; + const String rm_prefix_str = rm_prefix + '.'; + role_search_params_cfg.emplace_back(); + auto & rm_params = role_search_params_cfg.back(); + + rm_params.base_dn = config.getString(rm_prefix_str + "base_dn", ""); + rm_params.attribute = config.getString(rm_prefix_str + "attribute", "cn"); + rm_params.filter_prefix = config.getString(rm_prefix_str + "filter_prefix", ""); + rm_params.filter_suffix = config.getString(rm_prefix_str + "filter_suffix", ""); + rm_params.fail_if_all_rules_mismatch = config.getBool(rm_prefix_str + "fail_if_all_rules_mismatch", true); + + auto scope = config.getString(rm_prefix_str + "scope", "subtree"); + boost::algorithm::to_lower(scope); + if (scope == "base") rm_params.scope = LDAPSearchParams::Scope::BASE; + else if (scope == "one_level") rm_params.scope = LDAPSearchParams::Scope::ONE_LEVEL; + else if (scope == "subtree") rm_params.scope = LDAPSearchParams::Scope::SUBTREE; + else if (scope == "children") rm_params.scope = LDAPSearchParams::Scope::CHILDREN; + else + throw Exception("Invalid value of 'scope' field in '" + key + "' section of LDAP user directory, must be one of 'base', 'one_level', 'subtree', or 'children'.", ErrorCodes::BAD_ARGUMENTS); + + Poco::Util::AbstractConfiguration::Keys all_mapping_keys; + config.keys(rm_prefix, all_mapping_keys); + for (const auto & mkey : all_mapping_keys) + { + if (mkey != "rule" && mkey.find("rule[") != 0) + continue; + + const String rule_prefix = rm_prefix_str + mkey; + const String rule_prefix_str = rule_prefix + '.'; + rm_params.role_mapping_rules.emplace_back(); + auto & role_mapping_rule = rm_params.role_mapping_rules.back(); + + role_mapping_rule.match = config.getString(rule_prefix_str + "match", ".+"); + role_mapping_rule.replace = config.getString(rule_prefix_str + "replace", "$&"); + role_mapping_rule.continue_on_match = config.getBool(rule_prefix_str + "continue_on_match", false); + } + } } access_control_manager = access_control_manager_; ldap_server = ldap_server_cfg; - default_role_names.swap(roles_cfg); - roles_of_interest.clear(); + role_search_params.swap(role_search_params_cfg); + common_role_names.swap(common_roles_cfg); + + users_per_roles.clear(); + granted_role_names.clear(); + granted_role_ids.clear(); + external_role_hashes.clear(); + role_change_subscription = access_control_manager->subscribeForChanges( [this] (const UUID & id, const AccessEntityPtr & entity) { @@ -73,11 +132,14 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m } ); - /// Update `roles_of_interests` with initial values. - for (const auto & role_name : default_role_names) + // Update granted_role_* with the initial values: resolved ids of roles from common_role_names. + for (const auto & role_name : common_role_names) { - if (auto role_id = access_control_manager->find(role_name)) - roles_of_interest.emplace(*role_id, role_name); + if (const auto role_id = access_control_manager->find(role_name)) + { + granted_role_names.insert_or_assign(*role_id, role_name); + granted_role_ids.insert_or_assign(role_name, *role_id); + } } } @@ -85,54 +147,263 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m void LDAPAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity) { std::scoped_lock lock(mutex); - - /// Update `roles_of_interests`. auto role = typeid_cast>(entity); - bool need_to_update_users = false; + const auto it = granted_role_names.find(id); - if (role && default_role_names.count(role->getName())) + if (role) // Added or renamed role. { - /// If a role was created with one of the `default_role_names` or renamed to one of the `default_role_names`, - /// then set `need_to_update_users`. - need_to_update_users = roles_of_interest.insert_or_assign(id, role->getName()).second; + const auto & new_role_name = role->getName(); + if (it != granted_role_names.end()) + { + // Revoke the old role if its name has been changed. + const auto & old_role_name = it->second; + if (new_role_name != old_role_name) + { + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + } + } + + // Grant the role. + applyRoleChangeNoLock(true /* grant */, id, new_role_name); } - else + else // Removed role. { - /// If a role was removed or renamed to a name which isn't contained in the `default_role_names`, - /// then set `need_to_update_users`. - need_to_update_users = roles_of_interest.erase(id) > 0; + if (it != granted_role_names.end()) + { + // Revoke the old role. + const auto & old_role_name = it->second; + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + } + } +} + + +void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name) +{ + std::vector user_ids; + + // Find relevant user ids. + if (common_role_names.count(role_name)) + { + user_ids = memory_storage.findAll(); + } + else { + const auto it = users_per_roles.find(role_name); + if (it != users_per_roles.end()) + { + const auto & user_names = it->second; + user_ids.reserve(user_names.size()); + for (const auto & user_name : user_names) + { + if (const auto user_id = memory_storage.find(user_name)) + user_ids.emplace_back(*user_id); + } + } } - /// Update users which have been created. - if (need_to_update_users) + // Update relevant users' granted roles. + if (!user_ids.empty()) { - auto update_func = [this] (const AccessEntityPtr & entity_) -> AccessEntityPtr + auto update_func = [&role_id, &grant] (const AccessEntityPtr & entity_) -> AccessEntityPtr { if (auto user = typeid_cast>(entity_)) { auto changed_user = typeid_cast>(user->clone()); auto & granted_roles = changed_user->granted_roles.roles; - granted_roles.clear(); - boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end())); + + if (grant) + granted_roles.insert(role_id); + else + granted_roles.erase(role_id); + return changed_user; } return entity_; }; - memory_storage.update(memory_storage.findAll(), update_func); + + memory_storage.update(user_ids, update_func); + + if (grant) + { + granted_role_names.insert_or_assign(role_id, role_name); + granted_role_ids.insert_or_assign(role_name, role_id); + } + else + { + granted_role_names.erase(role_id); + granted_role_ids.erase(role_name); + } } } -void LDAPAccessStorage::checkAllDefaultRoleNamesFoundNoLock() const +void LDAPAccessStorage::grantRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const { - boost::container::flat_set role_names_of_interest; - boost::range::copy(roles_of_interest | boost::adaptors::map_values, std::inserter(role_names_of_interest, role_names_of_interest.end())); + const auto & user_name = user.getName(); + const auto new_hash = boost::hash{}(external_roles); + auto & granted_roles = user.granted_roles.roles; - for (const auto & role_name : default_role_names) + // Map external role names to local role names. + const auto user_role_names = mapExternalRolesNoLock(user_name, external_roles); + + external_role_hashes.erase(user_name); + granted_roles.clear(); + + // Grant the common roles. + + // Initially, all the available ids of common roles were resolved in setConfiguration(), + // and, then, maintained by processRoleChange(), so here we just grant those that exist (i.e., resolved). + for (const auto & role_name : common_role_names) { - if (!role_names_of_interest.count(role_name)) - throwDefaultRoleNotFound(role_name); + const auto it = granted_role_ids.find(role_name); + if (it == granted_role_ids.end()) + { + LOG_WARNING(getLogger(), "Unable to grant common role '{}' to user '{}': role not found", role_name, user_name); + } + else + { + const auto & role_id = it->second; + granted_roles.insert(role_id); + } } + + // Grant the mapped external roles. + + // Cleanup helper relations. + for (auto it = users_per_roles.begin(); it != users_per_roles.end();) + { + const auto & role_name = it->first; + auto & user_names = it->second; + if (user_role_names.count(role_name) == 0) + { + user_names.erase(user_name); + if (user_names.empty()) + { + if (common_role_names.count(role_name) == 0) + { + auto rit = granted_role_ids.find(role_name); + if (rit != granted_role_ids.end()) + { + granted_role_names.erase(rit->second); + granted_role_ids.erase(rit); + } + } + users_per_roles.erase(it++); + } + else + { + ++it; + } + } + else + { + ++it; + } + } + + // Resolve and assign mapped external role ids. + for (const auto & role_name : user_role_names) + { + users_per_roles[role_name].insert(user_name); + const auto it = granted_role_ids.find(role_name); + if (it == granted_role_ids.end()) + { + if (const auto role_id = access_control_manager->find(role_name)) + { + granted_roles.insert(*role_id); + granted_role_names.insert_or_assign(*role_id, role_name); + granted_role_ids.insert_or_assign(role_name, *role_id); + } + else + { + LOG_WARNING(getLogger(), "Unable to grant mapped role '{}' to user '{}': role not found", role_name, user_name); + } + } + else + { + const auto & role_id = it->second; + granted_roles.insert(role_id); + } + } + + external_role_hashes[user_name] = new_hash; +} + + +void LDAPAccessStorage::updateRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const +{ + // common_role_names are not included since they don't change. + const auto new_hash = boost::hash{}(external_roles); + + const auto it = external_role_hashes.find(user_name); + if (it != external_role_hashes.end() && it->second == new_hash) + return; + + auto update_func = [this, &external_roles] (const AccessEntityPtr & entity_) -> AccessEntityPtr + { + if (auto user = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user->clone()); + grantRolesNoLock(*changed_user, external_roles); + return changed_user; + } + return entity_; + }; + + memory_storage.update(id, update_func); +} + + +std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_name, const LDAPSearchResultsList & external_roles) const +{ + std::set role_names; + + if (external_roles.size() != role_search_params.size()) + throw Exception("Unable to match external roles to mapping rules", ErrorCodes::BAD_ARGUMENTS); + + std::vector re_cache; + + for (std::size_t i = 0; i < external_roles.size(); ++i) + { + const auto & external_role_set = external_roles[i]; + const auto & role_mapping_rules = role_search_params[i].role_mapping_rules; + + re_cache.clear(); + re_cache.reserve(role_mapping_rules.size()); + + for (const auto & mapping_rule : role_mapping_rules) + { + re_cache.emplace_back(mapping_rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize); + } + + for (const auto & external_role : external_role_set) + { + bool have_match = false; + for (std::size_t j = 0; j < role_mapping_rules.size(); ++j) + { + const auto & mapping_rule = role_mapping_rules[j]; + const auto & re = re_cache[j]; + std::smatch match_results; + if (std::regex_match(external_role, match_results, re)) + { + role_names.emplace(match_results.format(mapping_rule.replace)); + have_match = true; + if (!mapping_rule.continue_on_match) + break; + } + } + if (!have_match && role_search_params[i].fail_if_all_rules_mismatch) + throw Exception("None of the external role mapping rules were able to match '" + external_role + "' string, received from LDAP server '" + ldap_server + "' for user '" + user_name + "'", ErrorCodes::BAD_ARGUMENTS); + } + } + + return role_names; +} + + +bool LDAPAccessStorage::isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const +{ + return user.authentication.isCorrectPasswordLDAP(password, user.getName(), external_authenticators, &role_search_params, &search_results); } @@ -148,7 +419,7 @@ String LDAPAccessStorage::getStorageParamsJSON() const Poco::JSON::Object params_json; params_json.set("server", ldap_server); - params_json.set("roles", default_role_names); + params_json.set("roles", common_role_names); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); @@ -251,17 +522,21 @@ bool LDAPAccessStorage::hasSubscriptionImpl(EntityType type) const UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const { std::scoped_lock lock(mutex); + LDAPSearchResultsList external_roles; auto id = memory_storage.find(user_name); if (id) { auto user = memory_storage.read(*id); - if (!isPasswordCorrectImpl(*user, password, external_authenticators)) + if (!isPasswordCorrectLDAPNoLock(*user, password, external_authenticators, external_roles)) throwInvalidPassword(); if (!isAddressAllowedImpl(*user, address)) throwAddressNotAllowed(address); + // Just in case external_roles are changed. This will be no-op if they are not. + updateRolesNoLock(*id, user_name, external_roles); + return *id; } else @@ -272,16 +547,13 @@ UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & passw user->authentication = Authentication(Authentication::Type::LDAP_SERVER); user->authentication.setServerName(ldap_server); - if (!isPasswordCorrectImpl(*user, password, external_authenticators)) + if (!isPasswordCorrectLDAPNoLock(*user, password, external_authenticators, external_roles)) throwInvalidPassword(); if (!isAddressAllowedImpl(*user, address)) throwAddressNotAllowed(address); - checkAllDefaultRoleNamesFoundNoLock(); - - auto & granted_roles = user->granted_roles.roles; - boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end())); + grantRolesNoLock(*user, external_roles); return memory_storage.insert(user); } @@ -303,18 +575,13 @@ UUID LDAPAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const user->authentication = Authentication(Authentication::Type::LDAP_SERVER); user->authentication.setServerName(ldap_server); - checkAllDefaultRoleNamesFoundNoLock(); + LDAPSearchResultsList external_roles; + // TODO: mapped external roles are not available here. Implement? - auto & granted_roles = user->granted_roles.roles; - boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end())); + grantRolesNoLock(*user, external_roles); return memory_storage.insert(user); } } -void LDAPAccessStorage::throwDefaultRoleNotFound(const String & role_name) -{ - throw Exception("One of the default roles, the role '" + role_name + "', is not found", IAccessEntity::TypeInfo::get(IAccessEntity::Type::ROLE).not_found_error_code); -} - } diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index b1b0001d9bc..eaa39dd1624 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace Poco @@ -20,6 +21,10 @@ namespace Poco namespace DB { class AccessControlManager; +struct LDAPSearchParams; +using LDAPSearchParamsList = std::vector; +using LDAPSearchResults = std::set; +using LDAPSearchResultsList = std::vector; /// Implementation of IAccessStorage which allows attaching users from a remote LDAP server. /// Currently, any user name will be treated as a name of an existing remote user, @@ -58,15 +63,22 @@ private: // IAccessStorage implementations. private: void setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix); void processRoleChange(const UUID & id, const AccessEntityPtr & entity); - void checkAllDefaultRoleNamesFoundNoLock() const; - [[noreturn]] static void throwDefaultRoleNotFound(const String & role_name); + void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); + void grantRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const; + void updateRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const; + std::set mapExternalRolesNoLock(const String & user_name, const LDAPSearchResultsList & external_roles) const; + bool isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const; mutable std::recursive_mutex mutex; AccessControlManager * access_control_manager = nullptr; String ldap_server; - std::set default_role_names; - std::map roles_of_interest; + LDAPSearchParamsList role_search_params; + std::set common_role_names; + mutable std::map> users_per_roles; // per-user roles: role name -> user names + mutable std::map granted_role_names; // currently granted roles: role id -> role name + mutable std::map granted_role_ids; // currently granted roles: role name -> role id + mutable std::map external_role_hashes; // user name -> LDAPSearchResultsList hash ext::scope_guard role_change_subscription; mutable MemoryAccessStorage memory_storage; }; diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index a3223902361..5bc63ebfd83 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -1,6 +1,10 @@ #include #include #include +#include + +#include +#include #include @@ -65,36 +69,38 @@ namespace } -void LDAPClient::diag(const int rc) +void LDAPClient::diag(const int rc, String text) { std::scoped_lock lock(ldap_global_mutex); if (rc != LDAP_SUCCESS) { - String text; const char * raw_err_str = ldap_err2string(rc); - - if (raw_err_str) - text = raw_err_str; + if (raw_err_str && *raw_err_str != '\0') + { + if (!text.empty()) + text += ": "; + text += raw_err_str; + } if (handle) { - String message; char * raw_message = nullptr; ldap_get_option(handle, LDAP_OPT_DIAGNOSTIC_MESSAGE, &raw_message); - if (raw_message) - { - message = raw_message; - ldap_memfree(raw_message); - raw_message = nullptr; - } + SCOPE_EXIT({ + if (raw_message) + { + ldap_memfree(raw_message); + raw_message = nullptr; + } + }); - if (!message.empty()) + if (raw_message && *raw_message != '\0') { if (!text.empty()) text += ": "; - text += message; + text += raw_message; } } @@ -250,10 +256,9 @@ void LDAPClient::openConnection() break; } + default: - { throw Exception("Unknown SASL mechanism", ErrorCodes::LDAP_ERROR); - } } } @@ -268,13 +273,164 @@ void LDAPClient::closeConnection() noexcept handle = nullptr; } -bool LDAPSimpleAuthClient::check() +LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) { std::scoped_lock lock(ldap_global_mutex); + LDAPSearchResults result; + + int scope = 0; + switch (search_params.scope) + { + case LDAPSearchParams::Scope::BASE: scope = LDAP_SCOPE_BASE; break; + case LDAPSearchParams::Scope::ONE_LEVEL: scope = LDAP_SCOPE_ONELEVEL; break; + case LDAPSearchParams::Scope::SUBTREE: scope = LDAP_SCOPE_SUBTREE; break; + case LDAPSearchParams::Scope::CHILDREN: scope = LDAP_SCOPE_CHILDREN; break; + } + + const String filter = search_params.filter_prefix + escapeForLDAP(params.user) + search_params.filter_suffix; + char * attrs[] = { const_cast(search_params.attribute.c_str()), nullptr }; + ::timeval timeout = { params.search_timeout.count(), 0 }; + LDAPMessage* msgs = nullptr; + + SCOPE_EXIT({ + if (msgs) + { + ldap_msgfree(msgs); + msgs = nullptr; + } + }); + + diag(ldap_search_ext_s(handle, search_params.base_dn.c_str(), scope, filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); + + for ( + auto * msg = ldap_first_message(handle, msgs); + msg != nullptr; + msg = ldap_next_message(handle, msg) + ) + { + switch (ldap_msgtype(msg)) + { + case LDAP_RES_SEARCH_ENTRY: + { + BerElement * ber = nullptr; + + SCOPE_EXIT({ + if (ber) + { + ber_free(ber, 0); + ber = nullptr; + } + }); + + for ( + auto * attr = ldap_first_attribute(handle, msg, &ber); + attr != nullptr; + attr = ldap_next_attribute(handle, msg, ber) + ) + { + SCOPE_EXIT({ + ldap_memfree(attr); + attr = nullptr; + }); + + if (search_params.attribute.empty() || boost::iequals(attr, search_params.attribute)) + { + auto ** vals = ldap_get_values_len(handle, msg, attr); + if (vals) + { + SCOPE_EXIT({ + ldap_value_free_len(vals); + vals = nullptr; + }); + + for (std::size_t i = 0; vals[i]; i++) + { + if (vals[i]->bv_val && vals[i]->bv_len > 0) + result.emplace(vals[i]->bv_val, vals[i]->bv_len); + } + } + } + } + + break; + } + + case LDAP_RES_SEARCH_REFERENCE: + { + char ** referrals = nullptr; + diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0)); + + if (referrals) + { + SCOPE_EXIT({ +// ldap_value_free(referrals); + ber_memvfree(reinterpret_cast(referrals)); + referrals = nullptr; + }); + + for (std::size_t i = 0; referrals[i]; i++) + { + LOG_WARNING(&Poco::Logger::get("LDAPClient"), "Received reference during LDAP search but not following it: {}", referrals[i]); + } + } + + break; + } + + case LDAP_RES_SEARCH_RESULT: + { + int rc = LDAP_SUCCESS; + char * matched_msg = nullptr; + char * error_msg = nullptr; + + diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0)); + + if (rc != LDAP_SUCCESS) + { + String message = "LDAP search failed"; + + const char * raw_err_str = ldap_err2string(rc); + if (raw_err_str && *raw_err_str != '\0') + { + message += ": "; + message += raw_err_str; + } + + if (error_msg && *error_msg != '\0') + { + message += ", "; + message += error_msg; + } + + if (matched_msg && *matched_msg != '\0') + { + message += ", matching DN part: "; + message += matched_msg; + } + + throw Exception(message, ErrorCodes::LDAP_ERROR); + } + + break; + } + + case -1: + throw Exception("Failed to process LDAP search message", ErrorCodes::LDAP_ERROR); + } + } + + return result; +} + +bool LDAPSimpleAuthClient::authenticate(const LDAPSearchParamsList * search_params, LDAPSearchResultsList * search_results) +{ if (params.user.empty()) throw Exception("LDAP authentication of a user with empty name is not allowed", ErrorCodes::BAD_ARGUMENTS); + if (!search_params != !search_results) + throw Exception("Cannot return LDAP search results", ErrorCodes::BAD_ARGUMENTS); + // Silently reject authentication attempt if the password is empty as if it didn't match. if (params.password.empty()) return false; @@ -284,6 +440,26 @@ bool LDAPSimpleAuthClient::check() // Will throw on any error, including invalid credentials. openConnection(); + // While connected, run search queries and save the results, if asked. + if (search_params) + { + search_results->clear(); + search_results->reserve(search_params->size()); + + try + { + for (const auto & single_search_params : *search_params) + { + search_results->emplace_back(search(single_search_params)); + } + } + catch (...) + { + search_results->clear(); + throw; + } + } + return true; } @@ -303,7 +479,12 @@ void LDAPClient::closeConnection() noexcept { } -bool LDAPSimpleAuthClient::check() +LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) +{ + throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +} + +bool LDAPSimpleAuthClient::authenticate() { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index 777c87c5b94..f0ace69649b 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -30,9 +30,10 @@ public: LDAPClient & operator= (LDAPClient &&) = delete; protected: - MAYBE_NORETURN void diag(const int rc); + MAYBE_NORETURN void diag(const int rc, String text = ""); MAYBE_NORETURN void openConnection(); void closeConnection() noexcept; + LDAPSearchResults search(const LDAPSearchParams & search_params); protected: const LDAPServerParams params; @@ -46,7 +47,7 @@ class LDAPSimpleAuthClient { public: using LDAPClient::LDAPClient; - bool check(); + bool authenticate(const LDAPSearchParamsList * search_params, LDAPSearchResultsList * search_results); }; } diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index eeadba6bc01..70a11155854 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -3,11 +3,46 @@ #include #include +#include +#include namespace DB { +struct LDAPRoleMappingRules +{ + String match = ".+"; + String replace = "$&"; + + bool continue_on_match = false; +}; + +struct LDAPSearchParams +{ + enum class Scope + { + BASE, + ONE_LEVEL, + SUBTREE, + CHILDREN + }; + + String base_dn; + String attribute = "cn"; + Scope scope = Scope::SUBTREE; + + String filter_prefix; + String filter_suffix; + + bool fail_if_all_rules_mismatch = false; + std::vector role_mapping_rules; +}; + +using LDAPSearchParamsList = std::vector; +using LDAPSearchResults = std::set; +using LDAPSearchResultsList = std::vector; + struct LDAPServerParams { enum class ProtocolVersion From be184272d8a84a8887e9f0d7a76c28025d1c9c33 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 20 Nov 2020 02:26:52 +0400 Subject: [PATCH 005/697] Compilation fixes --- src/Access/LDAPAccessStorage.cpp | 3 ++- src/Access/LDAPClient.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 7218c317b8c..e12098c69a6 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -187,7 +187,8 @@ void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, { user_ids = memory_storage.findAll(); } - else { + else + { const auto it = users_per_roles.find(role_name); if (it != users_per_roles.end()) { diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 5bc63ebfd83..45879ddf480 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -465,7 +465,7 @@ bool LDAPSimpleAuthClient::authenticate(const LDAPSearchParamsList * search_para #else // USE_LDAP -void LDAPClient::diag(const int) +void LDAPClient::diag(const int, String) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } @@ -479,12 +479,12 @@ void LDAPClient::closeConnection() noexcept { } -LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) +LDAPSearchResults LDAPClient::search(const LDAPSearchParams &) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } -bool LDAPSimpleAuthClient::authenticate() +bool LDAPSimpleAuthClient::authenticate(const LDAPSearchParamsList *, LDAPSearchResultsList *) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } From facdd225aaeb4628a6dce48592b8d2a0fe5a110c Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 20 Nov 2020 20:59:56 +0400 Subject: [PATCH 006/697] Add regex syntax checks --- src/Access/Authentication.cpp | 4 ++-- src/Access/LDAPAccessStorage.cpp | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 27a62e3a2af..8270c1465fe 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -34,13 +34,13 @@ Authentication::Digest Authentication::getPasswordDoubleSHA1() const } case SHA256_PASSWORD: - throw Exception("Cannot get password double SHA1 for user with 'SHA256_PASSWORD' authentication.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Cannot get password double SHA1 for user with 'SHA256_PASSWORD' authentication", ErrorCodes::BAD_ARGUMENTS); case DOUBLE_SHA1_PASSWORD: return password_hash; case LDAP_SERVER: - throw Exception("Cannot get password double SHA1 for user with 'LDAP_SERVER' authentication.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Cannot get password double SHA1 for user with 'LDAP_SERVER' authentication", ErrorCodes::BAD_ARGUMENTS); case MAX_TYPE: break; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index e12098c69a6..02d64a9e4c9 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -50,11 +50,11 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m const bool has_role_mapping = config.has(prefix_str + "role_mapping"); if (!has_server) - throw Exception("Missing 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Missing 'server' field for LDAP user directory", ErrorCodes::BAD_ARGUMENTS); const auto ldap_server_cfg = config.getString(prefix_str + "server"); if (ldap_server_cfg.empty()) - throw Exception("Empty 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Empty 'server' field for LDAP user directory", ErrorCodes::BAD_ARGUMENTS); std::set common_roles_cfg; if (has_roles) @@ -94,7 +94,7 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m else if (scope == "subtree") rm_params.scope = LDAPSearchParams::Scope::SUBTREE; else if (scope == "children") rm_params.scope = LDAPSearchParams::Scope::CHILDREN; else - throw Exception("Invalid value of 'scope' field in '" + key + "' section of LDAP user directory, must be one of 'base', 'one_level', 'subtree', or 'children'.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Invalid value of 'scope' field in '" + key + "' section of LDAP user directory, must be one of 'base', 'one_level', 'subtree', or 'children'", ErrorCodes::BAD_ARGUMENTS); Poco::Util::AbstractConfiguration::Keys all_mapping_keys; config.keys(rm_prefix, all_mapping_keys); @@ -109,6 +109,16 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m auto & role_mapping_rule = rm_params.role_mapping_rules.back(); role_mapping_rule.match = config.getString(rule_prefix_str + "match", ".+"); + try + { + // Construct unused regex instance just to check the syntax. + std::regex(role_mapping_rule.match, std::regex_constants::ECMAScript); + } + catch (const std::regex_error & e) + { + throw Exception("ECMAScript regex syntax error in 'match' field in '" + mkey + "' rule of '" + key + "' section of LDAP user directory: " + e.what(), ErrorCodes::BAD_ARGUMENTS); + } + role_mapping_rule.replace = config.getString(rule_prefix_str + "replace", "$&"); role_mapping_rule.continue_on_match = config.getBool(rule_prefix_str + "continue_on_match", false); } @@ -371,7 +381,6 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_n re_cache.clear(); re_cache.reserve(role_mapping_rules.size()); - for (const auto & mapping_rule : role_mapping_rules) { re_cache.emplace_back(mapping_rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize); From a0a50c1eb62a0082efebb1beeecbd8a7ac68c5f1 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 20 Nov 2020 23:31:21 +0400 Subject: [PATCH 007/697] Change some config parameters to handle placeholders --- src/Access/ExternalAuthenticators.cpp | 17 ++++++++++--- src/Access/LDAPAccessStorage.cpp | 3 +-- src/Access/LDAPClient.cpp | 36 ++++++++++++++++++++++++--- src/Access/LDAPParams.h | 8 ++---- 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 3ed1b21c3c2..6f37bbf666e 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -27,6 +27,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str const bool has_host = config.has(ldap_server_config + ".host"); const bool has_port = config.has(ldap_server_config + ".port"); + const bool has_bind_dn = config.has(ldap_server_config + ".bind_dn"); const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); @@ -46,11 +47,19 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str if (params.host.empty()) throw Exception("Empty 'host' entry", ErrorCodes::BAD_ARGUMENTS); - if (has_auth_dn_prefix) - params.auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + if (has_bind_dn) + { + if (has_auth_dn_prefix || has_auth_dn_suffix) + throw Exception("Deprecated 'auth_dn_prefix' and 'auth_dn_suffix' entries cannot be used with 'bind_dn' entry", ErrorCodes::BAD_ARGUMENTS); - if (has_auth_dn_suffix) - params.auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + params.bind_dn = config.getString(ldap_server_config + ".bind_dn"); + } + else if (has_auth_dn_prefix || has_auth_dn_suffix) + { + const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + params.bind_dn = auth_dn_prefix + "{username}" + auth_dn_suffix; + } if (has_enable_tls) { diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 02d64a9e4c9..814e9d63826 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -82,9 +82,8 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m auto & rm_params = role_search_params_cfg.back(); rm_params.base_dn = config.getString(rm_prefix_str + "base_dn", ""); + rm_params.search_filter = config.getString(rm_prefix_str + "search_filter", ""); rm_params.attribute = config.getString(rm_prefix_str + "attribute", "cn"); - rm_params.filter_prefix = config.getString(rm_prefix_str + "filter_prefix", ""); - rm_params.filter_suffix = config.getString(rm_prefix_str + "filter_suffix", ""); rm_params.fail_if_all_rules_mismatch = config.getBool(rm_prefix_str + "fail_if_all_rules_mismatch", true); auto scope = config.getString(rm_prefix_str + "scope", "subtree"); diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 45879ddf480..c702aa3c40f 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -7,6 +7,8 @@ #include #include +#include +#include #include @@ -67,6 +69,28 @@ namespace return dest; } + auto replacePlaceholders(const String & src, const std::vector> & pairs) + { + String dest = src; + + for (const auto & pair : pairs) + { + const auto & placeholder = pair.first; + const auto & value = pair.second; + for ( + auto pos = dest.find(placeholder); + pos != std::string::npos; + pos = dest.find(placeholder, pos) + ) + { + dest.replace(pos, placeholder.size(), value); + pos += value.size(); + } + } + + return dest; + } + } void LDAPClient::diag(const int rc, String text) @@ -246,13 +270,14 @@ void LDAPClient::openConnection() { case LDAPServerParams::SASLMechanism::SIMPLE: { - const String dn = params.auth_dn_prefix + escapeForLDAP(params.user) + params.auth_dn_suffix; + const auto escaped_username = escapeForLDAP(params.user); + const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{username}", escaped_username} }); ::berval cred; cred.bv_val = const_cast(params.password.c_str()); cred.bv_len = params.password.size(); - diag(ldap_sasl_bind_s(handle, dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr)); + diag(ldap_sasl_bind_s(handle, bind_dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr)); break; } @@ -288,7 +313,10 @@ LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) case LDAPSearchParams::Scope::CHILDREN: scope = LDAP_SCOPE_CHILDREN; break; } - const String filter = search_params.filter_prefix + escapeForLDAP(params.user) + search_params.filter_suffix; + const auto escaped_username = escapeForLDAP(params.user); + const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{username}", escaped_username} }); + const auto base_dn = replacePlaceholders(search_params.base_dn, { {"{username}", escaped_username}, {"{bind_dn}", bind_dn} }); + const auto search_filter = replacePlaceholders(search_params.search_filter, { {"{username}", escaped_username}, {"{bind_dn}", bind_dn}, {"{base_dn}", base_dn} }); char * attrs[] = { const_cast(search_params.attribute.c_str()), nullptr }; ::timeval timeout = { params.search_timeout.count(), 0 }; LDAPMessage* msgs = nullptr; @@ -301,7 +329,7 @@ LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) } }); - diag(ldap_search_ext_s(handle, search_params.base_dn.c_str(), scope, filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); + diag(ldap_search_ext_s(handle, base_dn.c_str(), scope, search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); for ( auto * msg = ldap_first_message(handle, msgs); diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index 70a11155854..7e03a94e76e 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -29,12 +29,10 @@ struct LDAPSearchParams }; String base_dn; + String search_filter; String attribute = "cn"; Scope scope = Scope::SUBTREE; - String filter_prefix; - String filter_suffix; - bool fail_if_all_rules_mismatch = false; std::vector role_mapping_rules; }; @@ -97,9 +95,7 @@ struct LDAPServerParams SASLMechanism sasl_mechanism = SASLMechanism::SIMPLE; - String auth_dn_prefix; - String auth_dn_suffix; - + String bind_dn; String user; String password; From c12e6ae7c5eb408d06e2a16acb980e3d51553141 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sat, 21 Nov 2020 00:49:29 +0400 Subject: [PATCH 008/697] Actualize Add role_mapping documentation (comments) --- programs/server/config.xml | 66 +++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index e17b59671af..380fcacd543 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -219,9 +219,9 @@ Parameters: host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise. - auth_dn_prefix, auth_dn_suffix - prefix and suffix used to construct the DN to bind to. - Effectively, the resulting DN will be constructed as auth_dn_prefix + escape(user_name) + auth_dn_suffix string. - Note, that this implies that auth_dn_suffix should usually have comma ',' as its first non-space character. + bind_dn - template used to construct the DN to bind to. + The resulting DN will be constructed by replacing all '{username}' substrings of the template with the actual + user name during each authentication attempt. enable_tls - flag to trigger use of secure connection to the LDAP server. Specify 'no' for plain text (ldap://) protocol (not recommended). Specify 'yes' for LDAP over SSL/TLS (ldaps://) protocol (recommended, the default). @@ -239,8 +239,7 @@ localhost 636 - uid= - ,ou=users,dc=example,dc=com + uid={username},ou=users,dc=example,dc=com yes tls1.2 demand @@ -269,9 +268,43 @@ server - one of LDAP server names defined in 'ldap_servers' config section above. This parameter is mandatory and cannot be empty. roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. - If no roles are specified, user will not be able to perform any actions after authentication. - If any of the listed roles is not defined locally at the time of authentication, the authenthication attempt - will fail as if the provided password was incorrect. + If no roles are specified here or assigned during role mapping (below), user will not be able to perform any + actions after authentication. + role_mapping - section with LDAP search parameters and mapping rules. + The list of strings (values of attributes) returned by the search will be transformed and the resulting strings + will be treated as local role names and assigned to the user. + There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be + applied. + base_dn - template used to construct the base DN for the LDAP search. + The resulting DN will be constructed by replacing all '{username}' and '{bind_dn}' substrings + of the template with the actual user name and bind DN during each LDAP search. + attribute - attribute name whose values will be returned by the LDAP search. + scope - scope of the LDAP search. + Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). + search_filter - template used to construct the search filter for the LDAP search. + The resulting filter will be constructed by replacing all '{username}', '{bind_dn}', and '{base_dn}' + substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + Note, that the special characters must be escaped properly in XML. + fail_if_all_rules_mismatch - flag to trigger failure if none of the rules were able to match and transform any + of the resulting strings returned by the LDAP search. By default, set to 'true'. + rule - section with matching and mapping info. + Each string will be matched to the regex and, if there is a full match, replaced using format string to + get the name of the local role that needs to be assigned to the user. + There can be multiple 'rule' sections defined inside the same 'role_mapping' section. Each of those rules + will be applied in the order they are listed in 'role_mapping' section. If a rule does not match a string + the next rule will be applied. If none of the rules matched a string and 'fail_if_all_rules_mismatch' is + set to 'false', that particular string will be ignored. If a rule matched a string and 'continue_on_match' + is set to 'false', the subsequent rules will not be applied to the current string. + match - regular expression, in ECMAScript format, used to match each entire string retured by LDAP serach. If + matched successfully, a replacement will be performed and the resulting string will be treated as local + role name and assigned to the user. By default, set to '.+' (match any non-empty string). + Note, that the special characters must be escaped properly in XML. + replace - format string used as a replace expression after the match succeeded. References like '$&' (entire + matched string), or '$n' (n-th subgroup) can be used. By default, set to '$&'. + Note, that the special characters must be escaped properly in XML. + continue_on_match - flag that indicates, whether to continue matching and mapping using the subsequent rules + after this rule successfully matched and mapped the string. By default, set to 'false'. + If set to 'true' and multiple rules match, multiple role names may be generated from one same input string. Example: my_ldap_server @@ -279,6 +312,23 @@ + + ou=groups,dc=example,dc=com + cn + subtree + (&(objectClass=groupOfNames)(member={bind_dn})) + true + + clickhouse_(.+) + $1 + true + + + .+ + $& + false + + --> From 78acf226dbb467a51868704c2a86eb0eb2d47683 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sat, 21 Nov 2020 18:08:40 +0400 Subject: [PATCH 009/697] Revert user directory name change --- src/Access/AccessControlManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 5aa9699d96f..a95d65ebb59 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -292,7 +292,7 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig( else if (type == "ldap") type = LDAPAccessStorage::STORAGE_TYPE; - String name = config.getString(prefix + ".name", key_in_user_directories); + String name = config.getString(prefix + ".name", type); if (type == MemoryAccessStorage::STORAGE_TYPE) { From fb481649ecbc5c6046e29f3bd54c842392dc418c Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sat, 21 Nov 2020 19:08:02 +0400 Subject: [PATCH 010/697] Change naming Add serialization of new params in getStorageParamsJSON() --- src/Access/LDAPAccessStorage.cpp | 71 +++++++++++++++++++++++++------- src/Access/LDAPParams.h | 2 +- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 814e9d63826..5ef1722cb6d 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -104,22 +104,22 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m const String rule_prefix = rm_prefix_str + mkey; const String rule_prefix_str = rule_prefix + '.'; - rm_params.role_mapping_rules.emplace_back(); - auto & role_mapping_rule = rm_params.role_mapping_rules.back(); + rm_params.rules.emplace_back(); + auto & rule = rm_params.rules.back(); - role_mapping_rule.match = config.getString(rule_prefix_str + "match", ".+"); + rule.match = config.getString(rule_prefix_str + "match", ".+"); try { // Construct unused regex instance just to check the syntax. - std::regex(role_mapping_rule.match, std::regex_constants::ECMAScript); + std::regex(rule.match, std::regex_constants::ECMAScript); } catch (const std::regex_error & e) { throw Exception("ECMAScript regex syntax error in 'match' field in '" + mkey + "' rule of '" + key + "' section of LDAP user directory: " + e.what(), ErrorCodes::BAD_ARGUMENTS); } - role_mapping_rule.replace = config.getString(rule_prefix_str + "replace", "$&"); - role_mapping_rule.continue_on_match = config.getBool(rule_prefix_str + "continue_on_match", false); + rule.replace = config.getString(rule_prefix_str + "replace", "$&"); + rule.continue_on_match = config.getBool(rule_prefix_str + "continue_on_match", false); } } } @@ -376,28 +376,28 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_n for (std::size_t i = 0; i < external_roles.size(); ++i) { const auto & external_role_set = external_roles[i]; - const auto & role_mapping_rules = role_search_params[i].role_mapping_rules; + const auto & rules = role_search_params[i].rules; re_cache.clear(); - re_cache.reserve(role_mapping_rules.size()); - for (const auto & mapping_rule : role_mapping_rules) + re_cache.reserve(rules.size()); + for (const auto & rule : rules) { - re_cache.emplace_back(mapping_rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize); + re_cache.emplace_back(rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize); } for (const auto & external_role : external_role_set) { bool have_match = false; - for (std::size_t j = 0; j < role_mapping_rules.size(); ++j) + for (std::size_t j = 0; j < rules.size(); ++j) { - const auto & mapping_rule = role_mapping_rules[j]; + const auto & rule = rules[j]; const auto & re = re_cache[j]; std::smatch match_results; if (std::regex_match(external_role, match_results, re)) { - role_names.emplace(match_results.format(mapping_rule.replace)); + role_names.emplace(match_results.format(rule.replace)); have_match = true; - if (!mapping_rule.continue_on_match) + if (!rule.continue_on_match) break; } } @@ -428,7 +428,48 @@ String LDAPAccessStorage::getStorageParamsJSON() const Poco::JSON::Object params_json; params_json.set("server", ldap_server); - params_json.set("roles", common_role_names); + + Poco::JSON::Array common_role_names_json; + for (const auto & role : common_role_names) + { + common_role_names_json.add(role); + } + params_json.set("roles", common_role_names_json); + + Poco::JSON::Array role_mappings_json; + for (const auto & role_mapping : role_search_params) + { + Poco::JSON::Object role_mapping_json; + + role_mapping_json.set("base_dn", role_mapping.base_dn); + role_mapping_json.set("search_filter", role_mapping.search_filter); + role_mapping_json.set("attribute", role_mapping.attribute); + role_mapping_json.set("fail_if_all_rules_mismatch", role_mapping.fail_if_all_rules_mismatch); + + String scope; + switch (role_mapping.scope) + { + case LDAPSearchParams::Scope::BASE: scope = "base"; break; + case LDAPSearchParams::Scope::ONE_LEVEL: scope = "one_level"; break; + case LDAPSearchParams::Scope::SUBTREE: scope = "subtree"; break; + case LDAPSearchParams::Scope::CHILDREN: scope = "children"; break; + } + role_mapping_json.set("scope", scope); + + Poco::JSON::Array rules_json; + for (const auto & rule : role_mapping.rules) + { + Poco::JSON::Object rule_json; + rule_json.set("match", rule.match); + rule_json.set("replace", rule.replace); + rule_json.set("continue_on_match", rule.continue_on_match); + rules_json.add(rule_json); + } + role_mapping_json.set("rules", rules_json); + + role_mappings_json.add(role_mapping_json); + } + params_json.set("role_mappings", role_mappings_json); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index 7e03a94e76e..811d3775684 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -34,7 +34,7 @@ struct LDAPSearchParams Scope scope = Scope::SUBTREE; bool fail_if_all_rules_mismatch = false; - std::vector role_mapping_rules; + std::vector rules; }; using LDAPSearchParamsList = std::vector; From e9a3a97cb929b0cbf87faac660a27df38a6ffa54 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 22 Nov 2020 00:44:54 +0400 Subject: [PATCH 011/697] Improve regex instance creation code --- src/Access/LDAPAccessStorage.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 5ef1722cb6d..48b10a17d1e 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB @@ -371,27 +372,18 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_n if (external_roles.size() != role_search_params.size()) throw Exception("Unable to match external roles to mapping rules", ErrorCodes::BAD_ARGUMENTS); - std::vector re_cache; - + std::unordered_map re_cache; for (std::size_t i = 0; i < external_roles.size(); ++i) { const auto & external_role_set = external_roles[i]; const auto & rules = role_search_params[i].rules; - - re_cache.clear(); - re_cache.reserve(rules.size()); - for (const auto & rule : rules) - { - re_cache.emplace_back(rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize); - } - for (const auto & external_role : external_role_set) { bool have_match = false; for (std::size_t j = 0; j < rules.size(); ++j) { const auto & rule = rules[j]; - const auto & re = re_cache[j]; + const auto & re = re_cache.try_emplace(rule.match, rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize).first->second; std::smatch match_results; if (std::regex_match(external_role, match_results, re)) { @@ -401,6 +393,7 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_n break; } } + if (!have_match && role_search_params[i].fail_if_all_rules_mismatch) throw Exception("None of the external role mapping rules were able to match '" + external_role + "' string, received from LDAP server '" + ldap_server + "' for user '" + user_name + "'", ErrorCodes::BAD_ARGUMENTS); } From 03b3a93a154358557243ab4104936aeafb4ee20b Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 22 Nov 2020 11:17:01 +0400 Subject: [PATCH 012/697] Compilation fix --- src/Access/LDAPAccessStorage.cpp | 3 +-- src/Access/LDAPClient.cpp | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 48b10a17d1e..a7441d8c679 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -380,9 +380,8 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_n for (const auto & external_role : external_role_set) { bool have_match = false; - for (std::size_t j = 0; j < rules.size(); ++j) + for (const auto & rule : rules) { - const auto & rule = rules[j]; const auto & re = re_cache.try_emplace(rule.match, rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize).first->second; std::smatch match_results; if (std::regex_match(external_role, match_results, re)) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index c702aa3c40f..cba74fbbb89 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -110,7 +110,6 @@ void LDAPClient::diag(const int rc, String text) if (handle) { char * raw_message = nullptr; - ldap_get_option(handle, LDAP_OPT_DIAGNOSTIC_MESSAGE, &raw_message); SCOPE_EXIT({ if (raw_message) @@ -120,6 +119,8 @@ void LDAPClient::diag(const int rc, String text) } }); + ldap_get_option(handle, LDAP_OPT_DIAGNOSTIC_MESSAGE, &raw_message); + if (raw_message && *raw_message != '\0') { if (!text.empty()) From b3e2ebbaa5900f50eba8515f8cff682c3eaff2a5 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 23 Nov 2020 08:19:38 +0300 Subject: [PATCH 013/697] Used global region for accessing S3 if can't determine exactly. --- src/IO/S3/PocoHTTPClient.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 4a5b79e31ea..b8b78a38985 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -71,6 +71,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() boost::algorithm::to_lower(matched_region); region = matched_region; } + else + { + region = Aws::Region::AWS_GLOBAL; + } } } From b6c2743103d02afcf8c09ed4a8a3b5a7619e3eba Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Thu, 3 Dec 2020 22:52:35 -0500 Subject: [PATCH 014/697] Updating TestFlows tests to support changes for LDAP role mapping. --- .../ldap/authentication/regression.py | 3 + .../requirements/requirements.py | 2 +- .../external_user_directory/regression.py | 3 + .../requirements/requirements.md | 23 +- .../requirements/requirements.py | 793 +++++++++++++++++- .../tests/external_user_directory_config.py | 10 +- .../external_user_directory/tests/roles.py | 77 +- .../tests/server_config.py | 2 +- 8 files changed, 872 insertions(+), 41 deletions(-) diff --git a/tests/testflows/ldap/authentication/regression.py b/tests/testflows/ldap/authentication/regression.py index ed75ce4fe75..ff004a998ca 100755 --- a/tests/testflows/ldap/authentication/regression.py +++ b/tests/testflows/ldap/authentication/regression.py @@ -29,6 +29,9 @@ xfails = { @TestFeature @Name("authentication") @ArgumentParser(argparser) +@Specifications( + SRS_007_ClickHouse_Authentication_of_Users_via_LDAP +) @Requirements( RQ_SRS_007_LDAP_Authentication("1.0") ) diff --git a/tests/testflows/ldap/authentication/requirements/requirements.py b/tests/testflows/ldap/authentication/requirements/requirements.py index 60fbef9b8cd..b1af73ac039 100644 --- a/tests/testflows/ldap/authentication/requirements/requirements.py +++ b/tests/testflows/ldap/authentication/requirements/requirements.py @@ -1,6 +1,6 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.201026.1232822. +# document by TestFlows v1.6.201124.1002350. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. from testflows.core import Specification diff --git a/tests/testflows/ldap/external_user_directory/regression.py b/tests/testflows/ldap/external_user_directory/regression.py index bd404d54438..b88c1a2a6d6 100755 --- a/tests/testflows/ldap/external_user_directory/regression.py +++ b/tests/testflows/ldap/external_user_directory/regression.py @@ -29,6 +29,9 @@ xfails = { @TestFeature @Name("external user directory") @ArgumentParser(argparser) +@Specifications( + SRS_009_ClickHouse_LDAP_External_User_Directory +) @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication("1.0") ) diff --git a/tests/testflows/ldap/external_user_directory/requirements/requirements.md b/tests/testflows/ldap/external_user_directory/requirements/requirements.md index 46532c3945d..7a8e554c17a 100644 --- a/tests/testflows/ldap/external_user_directory/requirements/requirements.md +++ b/tests/testflows/ldap/external_user_directory/requirements/requirements.md @@ -51,6 +51,7 @@ * 4.2.2.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.New](#rqsrs-009ldapexternaluserdirectoryrolenew) * 4.2.2.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NewPrivilege](#rqsrs-009ldapexternaluserdirectoryrolenewprivilege) * 4.2.2.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.RemovedPrivilege](#rqsrs-009ldapexternaluserdirectoryroleremovedprivilege) + * 4.2.2.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added](#rqsrs-009ldapexternaluserdirectoryrolenotpresentadded) * 4.2.3 [Configuration](#configuration) * 4.2.3.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid](#rqsrs-009ldapexternaluserdirectoryconfigurationserverinvalid) * 4.2.3.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Definition](#rqsrs-009ldapexternaluserdirectoryconfigurationserverdefinition) @@ -334,12 +335,10 @@ are configured during parallel [LDAP] user logins. #### Roles ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed -version: 1.0 +version: 2.0 -[ClickHouse] SHALL reject authentication attempt if any of the roles that are specified in the configuration -of the external user directory are not defined at the time of the authentication attempt -with an exception that if a user was able to authenticate in past and its internal user object was created and cached -then the user SHALL be able to authenticate again, even if one of the roles is missing. +[ClickHouse] SHALL allow authentication even if the roles that are specified in the configuration +of the external user directory are not defined at the time of the authentication attempt. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed.Privileges version: 1.0 @@ -377,6 +376,14 @@ version: 1.0 including cached users when privilege is removed from all the roles specified in the configuration of the external user directory. +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added +version: 1.0 + +[ClickHouse] SHALL add a role to the users authenticated using LDAP external user directory +that did not exist during the time of authentication but are defined in the +configuration file as soon as the role with that name becomes +available. + #### Configuration ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid @@ -659,10 +666,10 @@ in the `` sub-section in the `` if more than one `roles` parameter is defined in the configuration. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid -version: 1.0 +version: 2.0 -[ClickHouse] SHALL return an error if the role specified in the `` -parameter does not exist locally. +[ClickHouse] SHALL not return an error if the role specified in the `` +parameter does not exist locally. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Empty version: 1.0 diff --git a/tests/testflows/ldap/external_user_directory/requirements/requirements.py b/tests/testflows/ldap/external_user_directory/requirements/requirements.py index 4c4b17d01dc..c499fc685f5 100644 --- a/tests/testflows/ldap/external_user_directory/requirements/requirements.py +++ b/tests/testflows/ldap/external_user_directory/requirements/requirements.py @@ -1,10 +1,771 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.201009.1190249. +# document by TestFlows v1.6.201124.1002350. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. +from testflows.core import Specification from testflows.core import Requirement +SRS_009_ClickHouse_LDAP_External_User_Directory = Specification( + name='SRS-009 ClickHouse LDAP External User Directory', + description=None, + author=None, + date=None, + status=None, + approved_by=None, + approved_date=None, + approved_version=None, + version=None, + group=None, + type=None, + link=None, + uid=None, + parent=None, + children=None, + content=''' +# SRS-009 ClickHouse LDAP External User Directory +# Software Requirements Specification + +## Table of Contents + +* 1 [Revision History](#revision-history) +* 2 [Introduction](#introduction) +* 3 [Terminology](#terminology) + * 3.1 [LDAP](#ldap) +* 4 [Requirements](#requirements) + * 4.1 [Generic](#generic) + * 4.1.1 [User Authentication](#user-authentication) + * 4.1.1.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication](#rqsrs-009ldapexternaluserdirectoryauthentication) + * 4.1.1.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.MultipleUserDirectories](#rqsrs-009ldapexternaluserdirectorymultipleuserdirectories) + * 4.1.1.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.MultipleUserDirectories.Lookup](#rqsrs-009ldapexternaluserdirectorymultipleuserdirectorieslookup) + * 4.1.1.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Users.Authentication.NewUsers](#rqsrs-009ldapexternaluserdirectoryusersauthenticationnewusers) + * 4.1.1.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.DeletedUsers](#rqsrs-009ldapexternaluserdirectoryauthenticationdeletedusers) + * 4.1.1.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Valid](#rqsrs-009ldapexternaluserdirectoryauthenticationvalid) + * 4.1.1.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Invalid](#rqsrs-009ldapexternaluserdirectoryauthenticationinvalid) + * 4.1.1.8 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.UsernameChanged](#rqsrs-009ldapexternaluserdirectoryauthenticationusernamechanged) + * 4.1.1.9 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.PasswordChanged](#rqsrs-009ldapexternaluserdirectoryauthenticationpasswordchanged) + * 4.1.1.10 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.LDAPServerRestart](#rqsrs-009ldapexternaluserdirectoryauthenticationldapserverrestart) + * 4.1.1.11 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.ClickHouseServerRestart](#rqsrs-009ldapexternaluserdirectoryauthenticationclickhouseserverrestart) + * 4.1.1.12 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel](#rqsrs-009ldapexternaluserdirectoryauthenticationparallel) + * 4.1.1.13 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.ValidAndInvalid](#rqsrs-009ldapexternaluserdirectoryauthenticationparallelvalidandinvalid) + * 4.1.1.14 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.MultipleServers](#rqsrs-009ldapexternaluserdirectoryauthenticationparallelmultipleservers) + * 4.1.1.15 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.LocalOnly](#rqsrs-009ldapexternaluserdirectoryauthenticationparallellocalonly) + * 4.1.1.16 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.LocalAndMultipleLDAP](#rqsrs-009ldapexternaluserdirectoryauthenticationparallellocalandmultipleldap) + * 4.1.1.17 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.SameUser](#rqsrs-009ldapexternaluserdirectoryauthenticationparallelsameuser) + * 4.1.1.18 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.DynamicallyAddedAndRemovedUsers](#rqsrs-009ldapexternaluserdirectoryauthenticationparalleldynamicallyaddedandremovedusers) + * 4.1.2 [Connection](#connection) + * 4.1.2.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.PlainText](#rqsrs-009ldapexternaluserdirectoryconnectionprotocolplaintext) + * 4.1.2.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS](#rqsrs-009ldapexternaluserdirectoryconnectionprotocoltls) + * 4.1.2.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.StartTLS](#rqsrs-009ldapexternaluserdirectoryconnectionprotocolstarttls) + * 4.1.2.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.Validation](#rqsrs-009ldapexternaluserdirectoryconnectionprotocoltlscertificatevalidation) + * 4.1.2.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.SelfSigned](#rqsrs-009ldapexternaluserdirectoryconnectionprotocoltlscertificateselfsigned) + * 4.1.2.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.SpecificCertificationAuthority](#rqsrs-009ldapexternaluserdirectoryconnectionprotocoltlscertificatespecificcertificationauthority) + * 4.1.2.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.Anonymous](#rqsrs-009ldapexternaluserdirectoryconnectionauthenticationmechanismanonymous) + * 4.1.2.8 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.Unauthenticated](#rqsrs-009ldapexternaluserdirectoryconnectionauthenticationmechanismunauthenticated) + * 4.1.2.9 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.NamePassword](#rqsrs-009ldapexternaluserdirectoryconnectionauthenticationmechanismnamepassword) + * 4.1.2.10 [RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.UnreachableServer](#rqsrs-009ldapexternaluserdirectoryconnectionauthenticationunreachableserver) + * 4.2 [Specific](#specific) + * 4.2.1 [User Discovery](#user-discovery) + * 4.2.1.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Users.Lookup.Priority](#rqsrs-009ldapexternaluserdirectoryuserslookuppriority) + * 4.2.1.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Restart.Server](#rqsrs-009ldapexternaluserdirectoryrestartserver) + * 4.2.1.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.Restart.Server.ParallelLogins](#rqsrs-009ldapexternaluserdirectoryrestartserverparallellogins) + * 4.2.2 [Roles](#roles) + * 4.2.2.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed](#rqsrs-009ldapexternaluserdirectoryroleremoved) + * 4.2.2.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed.Privileges](#rqsrs-009ldapexternaluserdirectoryroleremovedprivileges) + * 4.2.2.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Readded.Privileges](#rqsrs-009ldapexternaluserdirectoryrolereaddedprivileges) + * 4.2.2.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.New](#rqsrs-009ldapexternaluserdirectoryrolenew) + * 4.2.2.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NewPrivilege](#rqsrs-009ldapexternaluserdirectoryrolenewprivilege) + * 4.2.2.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.RemovedPrivilege](#rqsrs-009ldapexternaluserdirectoryroleremovedprivilege) + * 4.2.2.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added](#rqsrs-009ldapexternaluserdirectoryrolenotpresentadded) + * 4.2.3 [Configuration](#configuration) + * 4.2.3.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid](#rqsrs-009ldapexternaluserdirectoryconfigurationserverinvalid) + * 4.2.3.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Definition](#rqsrs-009ldapexternaluserdirectoryconfigurationserverdefinition) + * 4.2.3.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Name](#rqsrs-009ldapexternaluserdirectoryconfigurationservername) + * 4.2.3.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Host](#rqsrs-009ldapexternaluserdirectoryconfigurationserverhost) + * 4.2.3.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Port](#rqsrs-009ldapexternaluserdirectoryconfigurationserverport) + * 4.2.3.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Port.Default](#rqsrs-009ldapexternaluserdirectoryconfigurationserverportdefault) + * 4.2.3.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Prefix](#rqsrs-009ldapexternaluserdirectoryconfigurationserverauthdnprefix) + * 4.2.3.8 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Suffix](#rqsrs-009ldapexternaluserdirectoryconfigurationserverauthdnsuffix) + * 4.2.3.9 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Value](#rqsrs-009ldapexternaluserdirectoryconfigurationserverauthdnvalue) + * 4.2.3.10 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS](#rqsrs-009ldapexternaluserdirectoryconfigurationserverenabletls) + * 4.2.3.11 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.Default](#rqsrs-009ldapexternaluserdirectoryconfigurationserverenabletlsoptionsdefault) + * 4.2.3.12 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.No](#rqsrs-009ldapexternaluserdirectoryconfigurationserverenabletlsoptionsno) + * 4.2.3.13 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.Yes](#rqsrs-009ldapexternaluserdirectoryconfigurationserverenabletlsoptionsyes) + * 4.2.3.14 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.StartTLS](#rqsrs-009ldapexternaluserdirectoryconfigurationserverenabletlsoptionsstarttls) + * 4.2.3.15 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsminimumprotocolversion) + * 4.2.3.16 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion.Values](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsminimumprotocolversionvalues) + * 4.2.3.17 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion.Default](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsminimumprotocolversiondefault) + * 4.2.3.18 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecert) + * 4.2.3.19 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Default](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecertoptionsdefault) + * 4.2.3.20 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Demand](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecertoptionsdemand) + * 4.2.3.21 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Allow](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecertoptionsallow) + * 4.2.3.22 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Try](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecertoptionstry) + * 4.2.3.23 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Never](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsrequirecertoptionsnever) + * 4.2.3.24 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCertFile](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlscertfile) + * 4.2.3.25 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSKeyFile](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlskeyfile) + * 4.2.3.26 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCACertDir](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlscacertdir) + * 4.2.3.27 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCACertFile](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlscacertfile) + * 4.2.3.28 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCipherSuite](#rqsrs-009ldapexternaluserdirectoryconfigurationservertlsciphersuite) + * 4.2.3.29 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Syntax](#rqsrs-009ldapexternaluserdirectoryconfigurationserversyntax) + * 4.2.3.30 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.LDAPUserDirectory](#rqsrs-009ldapexternaluserdirectoryconfigurationusersldapuserdirectory) + * 4.2.3.31 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.LDAPUserDirectory.MoreThanOne](#rqsrs-009ldapexternaluserdirectoryconfigurationusersldapuserdirectorymorethanone) + * 4.2.3.32 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Syntax](#rqsrs-009ldapexternaluserdirectoryconfigurationuserssyntax) + * 4.2.3.33 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersserver) + * 4.2.3.34 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Empty](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersserverempty) + * 4.2.3.35 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Missing](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersservermissing) + * 4.2.3.36 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.MoreThanOne](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersservermorethanone) + * 4.2.3.37 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Invalid](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersserverinvalid) + * 4.2.3.38 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersroles) + * 4.2.3.39 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.MoreThanOne](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersrolesmorethanone) + * 4.2.3.40 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersrolesinvalid) + * 4.2.3.41 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Empty](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersrolesempty) + * 4.2.3.42 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Missing](#rqsrs-009ldapexternaluserdirectoryconfigurationusersparametersrolesmissing) + * 4.2.4 [Authentication](#authentication) + * 4.2.4.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.Empty](#rqsrs-009ldapexternaluserdirectoryauthenticationusernameempty) + * 4.2.4.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.Long](#rqsrs-009ldapexternaluserdirectoryauthenticationusernamelong) + * 4.2.4.3 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.UTF8](#rqsrs-009ldapexternaluserdirectoryauthenticationusernameutf8) + * 4.2.4.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.Empty](#rqsrs-009ldapexternaluserdirectoryauthenticationpasswordempty) + * 4.2.4.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.Long](#rqsrs-009ldapexternaluserdirectoryauthenticationpasswordlong) + * 4.2.4.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.UTF8](#rqsrs-009ldapexternaluserdirectoryauthenticationpasswordutf8) +* 5 [References](#references) + +## Revision History + +This document is stored in an electronic form using [Git] source control management software +hosted in a [GitHub Repository]. +All the updates are tracked using the [Revision History]. + +## Introduction + +The [QA-SRS007 ClickHouse Authentication of Users via LDAP] enables support for authenticating +users using an [LDAP] server. This requirements specifications add addition functionality +for integrating [LDAP] with [ClickHouse]. + +This document will cover requirements to allow authenticatoin of users stored in the +external user discovery using an [LDAP] server without having to explicitly define users in [ClickHouse]'s +`users.xml` configuration file. + +## Terminology + +### LDAP + +* Lightweight Directory Access Protocol + +## Requirements + +### Generic + +#### User Authentication + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication +version: 1.0 + +[ClickHouse] SHALL support authenticating users that are defined only on the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.MultipleUserDirectories +version: 1.0 + +[ClickHouse] SHALL support authenticating users using multiple [LDAP] external user directories. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.MultipleUserDirectories.Lookup +version: 1.0 + +[ClickHouse] SHALL attempt to authenticate external [LDAP] user +using [LDAP] external user directory in the same order +in which user directories are specified in the `config.xml` file. +If a user cannot be authenticated using the first [LDAP] external user directory +then the next user directory in the list SHALL be used. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Users.Authentication.NewUsers +version: 1.0 + +[ClickHouse] SHALL support authenticating users that are defined only on the [LDAP] server +as soon as they are added to the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.DeletedUsers +version: 1.0 + +[ClickHouse] SHALL not allow authentication of users that +were previously defined only on the [LDAP] server but were removed +from the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Valid +version: 1.0 + +[ClickHouse] SHALL only allow user authentication using [LDAP] server if and only if +user name and password match [LDAP] server records for the user +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if either user name or password +do not match [LDAP] server records for the user +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.UsernameChanged +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if the username is changed +on the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.PasswordChanged +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if the password +for the user is changed on the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.LDAPServerRestart +version: 1.0 + +[ClickHouse] SHALL support authenticating users after [LDAP] server is restarted +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.ClickHouseServerRestart +version: 1.0 + +[ClickHouse] SHALL support authenticating users after server is restarted +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users using [LDAP] server +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.ValidAndInvalid +version: 1.0 + +[ClickHouse] SHALL support authentication of valid users and +prohibit authentication of invalid users using [LDAP] server +in parallel without having invalid attempts affecting valid authentications +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.MultipleServers +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of external [LDAP] users +authenticated using multiple [LDAP] external user directories. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.LocalOnly +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users defined only locally +when one or more [LDAP] external user directories are specified in the configuration file. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.LocalAndMultipleLDAP +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users +authenticated using multiple [LDAP] external user directories. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.SameUser +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user +authenticated using the same [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Parallel.DynamicallyAddedAndRemovedUsers +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users using +[LDAP] external user directory when [LDAP] users are dynamically added and +removed. + +#### Connection + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.PlainText +version: 1.0 + +[ClickHouse] SHALL support user authentication using plain text `ldap://` non secure protocol +while connecting to the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS +version: 1.0 + +[ClickHouse] SHALL support user authentication using `SSL/TLS` `ldaps://` secure protocol +while connecting to the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.StartTLS +version: 1.0 + +[ClickHouse] SHALL support user authentication using legacy `StartTLS` protocol which is a +plain text `ldap://` protocol that is upgraded to [TLS] when connecting to the [LDAP] server +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.Validation +version: 1.0 + +[ClickHouse] SHALL support certificate validation used for [TLS] connections +to the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.SelfSigned +version: 1.0 + +[ClickHouse] SHALL support self-signed certificates for [TLS] connections +to the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Protocol.TLS.Certificate.SpecificCertificationAuthority +version: 1.0 + +[ClickHouse] SHALL support certificates signed by specific Certification Authority for [TLS] connections +to the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.Anonymous +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication using [Anonymous Authentication Mechanism of Simple Bind] +authentication mechanism when connecting to the [LDAP] server when using [LDAP] external server directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.Unauthenticated +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication using [Unauthenticated Authentication Mechanism of Simple Bind] +authentication mechanism when connecting to the [LDAP] server when using [LDAP] external server directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.Mechanism.NamePassword +version: 1.0 + +[ClickHouse] SHALL allow authentication using only [Name/Password Authentication Mechanism of Simple Bind] +authentication mechanism when connecting to the [LDAP] server when using [LDAP] external server directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Connection.Authentication.UnreachableServer +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server is unreachable +when using [LDAP] external user directory. + +### Specific + +#### User Discovery + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Users.Lookup.Priority +version: 2.0 + +[ClickHouse] SHALL lookup user presence in the same order +as user directories are defined in the `config.xml`. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Restart.Server +version: 1.0 + +[ClickHouse] SHALL support restarting server when one or more LDAP external directories +are configured. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Restart.Server.ParallelLogins +version: 1.0 + +[ClickHouse] SHALL support restarting server when one or more LDAP external directories +are configured during parallel [LDAP] user logins. + +#### Roles + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed +version: 2.0 + +[ClickHouse] SHALL allow authentication even if the roles that are specified in the configuration +of the external user directory are not defined at the time of the authentication attempt. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed.Privileges +version: 1.0 + +[ClickHouse] SHALL remove the privileges provided by the role from all the LDAP +users authenticated using external user directory if it is removed +including currently cached users that are still able to authenticated where the removed +role is specified in the configuration of the external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Readded.Privileges +version: 1.0 + +[ClickHouse] SHALL reassign the role and add the privileges provided by the role +when it is re-added after removal for all LDAP users authenticated using external user directory +including any cached users where the re-added role was specified in the configuration of the external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.New +version: 1.0 + +[ClickHouse] SHALL not allow any new roles to be assigned to any LDAP +users authenticated using external user directory unless the role is specified +in the configuration of the external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NewPrivilege +version: 1.0 + +[ClickHouse] SHALL add new privilege to all the LDAP users authenticated using external user directory +including cached users when new privilege is added to one of the roles specified +in the configuration of the external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.RemovedPrivilege +version: 1.0 + +[ClickHouse] SHALL remove privilege from all the LDAP users authenticated using external user directory +including cached users when privilege is removed from all the roles specified +in the configuration of the external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added +version: 1.0 + +[ClickHouse] SHALL add a role to the users authenticated using LDAP external user directory +that did not exist during the time of authentication but are defined in the +configuration file as soon as the role with that name becomes +available. + +#### Configuration + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server configuration is not valid. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Definition +version: 1.0 + +[ClickHouse] SHALL support using the [LDAP] servers defined in the +`ldap_servers` section of the `config.xml` as the server to be used +for a external user directory that uses an [LDAP] server as a source of user definitions. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Name +version: 1.0 + +[ClickHouse] SHALL not support empty string as a server name. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Host +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [LDAP] +server hostname or IP, this parameter SHALL be mandatory and SHALL not be empty. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Port +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [LDAP] server port. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Port.Default +version: 1.0 + +[ClickHouse] SHALL use default port number `636` if `enable_tls` is set to `yes` or `389` otherwise. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Prefix +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify the prefix +of value used to construct the DN to bound to during authentication via [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Suffix +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify the suffix +of value used to construct the DN to bound to during authentication via [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.AuthDN.Value +version: 1.0 + +[ClickHouse] SHALL construct DN as `auth_dn_prefix + escape(user_name) + auth_dn_suffix` string. + +> This implies that auth_dn_suffix should usually have comma ',' as its first non-space character. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS +version: 1.0 + +[ClickHouse] SHALL support `` parameter to trigger the use of secure connection to the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.Default +version: 1.0 + +[ClickHouse] SHALL use `yes` value as the default for `` parameter +to enable SSL/TLS `ldaps://` protocol. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.No +version: 1.0 + +[ClickHouse] SHALL support specifying `no` as the value of `` parameter to enable +plain text `ldap://` protocol. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.Yes +version: 1.0 + +[ClickHouse] SHALL support specifying `yes` as the value of `` parameter to enable +SSL/TLS `ldaps://` protocol. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.EnableTLS.Options.StartTLS +version: 1.0 + +[ClickHouse] SHALL support specifying `starttls` as the value of `` parameter to enable +legacy `StartTLS` protocol that used plain text `ldap://` protocol, upgraded to [TLS]. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify +the minimum protocol version of SSL/TLS. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion.Values +version: 1.0 + +[ClickHouse] SHALL support specifying `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, and `tls1.2` +as a value of the `` parameter. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSMinimumProtocolVersion.Default +version: 1.0 + +[ClickHouse] SHALL set `tls1.2` as the default value of the `` parameter. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [TLS] peer +certificate verification behavior. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Default +version: 1.0 + +[ClickHouse] SHALL use `demand` value as the default for the `` parameter. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Demand +version: 1.0 + +[ClickHouse] SHALL support specifying `demand` as the value of `` parameter to +enable requesting of client certificate. If no certificate is provided, or a bad certificate is +provided, the session SHALL be immediately terminated. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Allow +version: 1.0 + +[ClickHouse] SHALL support specifying `allow` as the value of `` parameter to +enable requesting of client certificate. If no +certificate is provided, the session SHALL proceed normally. +If a bad certificate is provided, it SHALL be ignored and the session SHALL proceed normally. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Try +version: 1.0 + +[ClickHouse] SHALL support specifying `try` as the value of `` parameter to +enable requesting of client certificate. If no certificate is provided, the session +SHALL proceed normally. If a bad certificate is provided, the session SHALL be +immediately terminated. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSRequireCert.Options.Never +version: 1.0 + +[ClickHouse] SHALL support specifying `never` as the value of `` parameter to +disable requesting of client certificate. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCertFile +version: 1.0 + +[ClickHouse] SHALL support `` to specify the path to certificate file used by +[ClickHouse] to establish connection with the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSKeyFile +version: 1.0 + +[ClickHouse] SHALL support `` to specify the path to key file for the certificate +specified by the `` parameter. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCACertDir +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify to a path to +the directory containing [CA] certificates used to verify certificates provided by the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCACertFile +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify a path to a specific +[CA] certificate file used to verify certificates provided by the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.TLSCipherSuite +version: 1.0 + +[ClickHouse] SHALL support `tls_cipher_suite` parameter to specify allowed cipher suites. +The value SHALL use the same format as the `ciphersuites` in the [OpenSSL Ciphers]. + +For example, + +```xml +ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 +``` + +The available suites SHALL depend on the [OpenSSL] library version and variant used to build +[ClickHouse] and therefore might change. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following example syntax to create an entry for an [LDAP] server inside the `config.xml` +configuration file or of any configuration file inside the `config.d` directory. + +```xml + + + localhost + 636 + cn= + , ou=users, dc=example, dc=com + yes + tls1.2 + demand + /path/to/tls_cert_file + /path/to/tls_key_file + /path/to/tls_ca_cert_file + /path/to/tls_ca_cert_dir + ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + + +``` + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.LDAPUserDirectory +version: 1.0 + +[ClickHouse] SHALL support `` sub-section in the `` section of the `config.xml` +that SHALL define a external user directory that uses an [LDAP] server as a source of user definitions. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.LDAPUserDirectory.MoreThanOne +version: 2.0 + +[ClickHouse] SHALL support more than one `` sub-sections in the `` section of the `config.xml` +that SHALL allow to define more than one external user directory that use an [LDAP] server as a source +of user definitions. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Syntax +version: 1.0 + +[ClickHouse] SHALL support `` section with the following syntax + +```xml + + + + my_ldap_server + + + + + + + +``` + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server +version: 1.0 + +[ClickHouse] SHALL support `server` parameter in the `` sub-section in the `` +section of the `config.xml` that SHALL specify one of LDAP server names +defined in `` section. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Empty +version: 1.0 + +[ClickHouse] SHALL return an error if the `server` parameter in the `` sub-section in the `` +is empty. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Missing +version: 1.0 + +[ClickHouse] SHALL return an error if the `server` parameter in the `` sub-section in the `` +is missing. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.MoreThanOne +version: 1.0 + +[ClickHouse] SHALL only use the first definitition of the `server` parameter in the `` sub-section in the `` +if more than one `server` parameter is defined in the configuration. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Server.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error if the server specified as the value of the `` +parameter is not defined. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles +version: 1.0 + +[ClickHouse] SHALL support `roles` parameter in the `` sub-section in the `` +section of the `config.xml` that SHALL specify the names of a locally defined roles that SHALL +be assigned to all users retrieved from the [LDAP] server. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.MoreThanOne +version: 1.0 + +[ClickHouse] SHALL only use the first definitition of the `roles` parameter +in the `` sub-section in the `` +if more than one `roles` parameter is defined in the configuration. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid +version: 2.0 + +[ClickHouse] SHALL not return an error if the role specified in the `` +parameter does not exist locally. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Empty +version: 1.0 + +[ClickHouse] SHALL not allow users authenticated using LDAP external user directory +to perform any action if the `roles` parameter in the `` sub-section in the `` +section is empty. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Missing +version: 1.0 + +[ClickHouse] SHALL not allow users authenticated using LDAP external user directory +to perform any action if the `roles` parameter in the `` sub-section in the `` +section is missing. + +#### Authentication + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.Empty +version: 1.0 + +[ClickHouse] SHALL not support authenticating users with empty username +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.Long +version: 1.0 + +[ClickHouse] SHALL support authenticating users with a long username of at least 256 bytes +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Username.UTF8 +version: 1.0 + +[ClickHouse] SHALL support authentication users with a username that contains [UTF-8] characters +when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.Empty +version: 1.0 + +[ClickHouse] SHALL not support authenticating users with empty passwords +even if an empty password is valid for the user and +is allowed by the [LDAP] server when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.Long +version: 1.0 + +[ClickHouse] SHALL support long password of at least 256 bytes +that can be used to authenticate users when using [LDAP] external user directory. + +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication.Password.UTF8 +version: 1.0 + +[ClickHouse] SHALL support [UTF-8] characters in passwords +used to authenticate users when using [LDAP] external user directory. + +## References + +* **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/ +* **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +* **ClickHouse:** https://clickhouse.tech + +[SRS]: #srs +[Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/ +[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md +[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +[ClickHouse]: https://clickhouse.tech +[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md +[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md +[Git]: https://git-scm.com/ +[GitHub]: https://github.com +''') + RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Authentication', version='1.0', @@ -449,16 +1210,14 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Restart_Server_ParallelLogins = Requiremen RQ_SRS_009_LDAP_ExternalUserDirectory_Role_Removed = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed', - version='1.0', + version='2.0', priority=None, group=None, type=None, uid=None, description=( - '[ClickHouse] SHALL reject authentication attempt if any of the roles that are specified in the configuration\n' - 'of the external user directory are not defined at the time of the authentication attempt\n' - 'with an exception that if a user was able to authenticate in past and its internal user object was created and cached\n' - 'then the user SHALL be able to authenticate again, even if one of the roles is missing.\n' + '[ClickHouse] SHALL allow authentication even if the roles that are specified in the configuration\n' + 'of the external user directory are not defined at the time of the authentication attempt.\n' '\n' ), link=None) @@ -539,6 +1298,22 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Role_RemovedPrivilege = Requirement( ), link=None) +RQ_SRS_009_LDAP_ExternalUserDirectory_Role_NotPresent_Added = Requirement( + name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL add a role to the users authenticated using LDAP external user directory\n' + 'that did not exist during the time of authentication but are defined in the \n' + 'configuration file as soon as the role with that name becomes\n' + 'available.\n' + '\n' + ), + link=None) + RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid', version='1.0', @@ -1132,14 +1907,14 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_MoreT RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_Invalid = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid', - version='1.0', + version='2.0', priority=None, group=None, type=None, uid=None, description=( - '[ClickHouse] SHALL return an error if the role specified in the ``\n' - 'parameter does not exist locally.\n' + '[ClickHouse] SHALL not return an error if the role specified in the ``\n' + 'parameter does not exist locally. \n' '\n' ), link=None) diff --git a/tests/testflows/ldap/external_user_directory/tests/external_user_directory_config.py b/tests/testflows/ldap/external_user_directory/tests/external_user_directory_config.py index b5677eba4b2..9ff480426bf 100644 --- a/tests/testflows/ldap/external_user_directory/tests/external_user_directory_config.py +++ b/tests/testflows/ldap/external_user_directory/tests/external_user_directory_config.py @@ -220,15 +220,12 @@ def defined_twice_roles(self, timeout=20): @TestScenario @Requirements( - RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_Invalid("1.0") + RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_Invalid("2.0") ) def invalid_role_in_roles(self, timeout=20): - """Check that an error is returned when LDAP users try to authenticate + """Check that no error is returned when LDAP users try to authenticate if an invalid role is specified inside the `roles` section. """ - exitcode = 4 - message = "DB::Exception: user1: Authentication failed" - servers = { "openldap1": { "host": "openldap1", "port": "389", "enable_tls": "no", @@ -241,8 +238,7 @@ def invalid_role_in_roles(self, timeout=20): with ldap_external_user_directory("openldap1", roles=["foo"], restart=True): with When(f"I login as {user['username']} and execute query"): current().context.node.query("SELECT 1", - settings=[("user", user["username"]), ("password", user["password"])], - exitcode=exitcode, message=message) + settings=[("user", user["username"]), ("password", user["password"])]) @TestScenario @Requirements( diff --git a/tests/testflows/ldap/external_user_directory/tests/roles.py b/tests/testflows/ldap/external_user_directory/tests/roles.py index 8a6c6f465d1..2b68c1da36b 100644 --- a/tests/testflows/ldap/external_user_directory/tests/roles.py +++ b/tests/testflows/ldap/external_user_directory/tests/roles.py @@ -139,17 +139,15 @@ def remove_privilege(self, server, timeout=20): @TestScenario @Requirements( - RQ_SRS_009_LDAP_ExternalUserDirectory_Role_Removed("1.0") + RQ_SRS_009_LDAP_ExternalUserDirectory_Role_Removed("2.0") ) def remove_role(self, server, timeout=20): """Check that when a role used in the external user directory configuration - is dynamically removed then any non-cached LDAP users should not be authenticated using + is dynamically removed then any LDAP users should still be authenticated using LDAP external user directory. """ node = self.context.node uid = getuid() - exitcode = 4 - message = "DB::Exception: {user}: Authentication failed: password is incorrect or there is no user with such name" self.context.ldap_node = self.context.cluster.node(server) @@ -174,8 +172,7 @@ def remove_role(self, server, timeout=20): with And(f"I try to login again using non-cached LDAP user"): node.query(f"SELECT 1", - settings=[("user", users[1]["username"]), ("password", users[1]["password"])], - exitcode=exitcode, message=message.format(user=users[1]["username"])) + settings=[("user", users[1]["username"]), ("password", users[1]["password"])]) @TestScenario @Requirements( @@ -228,7 +225,7 @@ def readd_privilege_by_readding_role(self, server, timeout=20): """Check that when the role used in the external user directory configuration is dynamically removed then all the privileges are removed from any LDAP users authenticated using external user directory but when the role is re-added - then privileges are restored and non-cached users can login again. + then privileges are restored. """ node = self.context.node uid = getuid() @@ -265,13 +262,9 @@ def readd_privilege_by_readding_role(self, server, timeout=20): settings=[("user", users[0]["username"]), ("password", users[0]["password"])], exitcode=exitcode, message=message.format(user=users[0]["username"])) - message = "DB::Exception: {user}: Authentication failed: password is incorrect or there is no user with such name" - exitcode = 4 - - with And(f"I try to login using non-cached LDAP user and expect it to fail"): + with And(f"I try to login using non-cached LDAP user and expect it to succeed"): node.query(f"SELECT 1", - settings=[("user", users[1]["username"]), ("password", users[1]["password"])], - exitcode=exitcode, message=message.format(user=users[1]["username"])) + settings=[("user", users[1]["username"]), ("password", users[1]["password"])]) with When("I re-add the role"): node.query(f"CREATE ROLE {roles[0]}") @@ -284,11 +277,65 @@ def readd_privilege_by_readding_role(self, server, timeout=20): node.query(f"SELECT * FROM {table_name} LIMIT 1", settings=[("user", users[0]["username"]), ("password", users[0]["password"])]) - with And("I try to login using non-cached LDAP expect it to work " - "with user also having privilege provided by the role"): + with And("I try to login using non-cached LDAP expect it to also work again and expect" + "for the user also to have privilege provided by the role"): node.query(f"SELECT * FROM {table_name} LIMIT 1", settings=[("user", users[1]["username"]), ("password", users[1]["password"])]) +@TestScenario +@Requirements( + RQ_SRS_009_LDAP_ExternalUserDirectory_Role_NotPresent_Added("1.0") +) +def not_present_role_added(self, server, timeout=20): + """Check that when the role used in the external user directory configuration + which was not present during LDAP user authentication + is dynamically added then all the privileges granted by the role + are given to all users authenticated using external LDAP user directory. + """ + node = self.context.node + uid = getuid() + + self.context.ldap_node = self.context.cluster.node(server) + + users = [ + {"username": f"user0_{uid}", "password": "user0_password"}, + {"username": f"user1_{uid}", "password": "user1_password"} + ] + + roles = [f"role0_{uid}", f"role1_{uid}"] + + with table(f"table_{getuid()}", "CREATE TABLE {name} (d DATE, s String, i UInt8) ENGINE = Memory()") as table_name: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + with ldap_users(*[{"cn": user["username"], "userpassword": user["password"]} for user in users]): + with When(f"I login using clickhouse-client"): + with self.context.cluster.shell(node=node.name) as shell: + with shell(f"TERM=dumb clickhouse client --user {users[0]['username']} --password {users[0]['password']} | tee", + asynchronous=True, name="client") as client: + client.app.expect("clickhouse1 :\) ") + + with When("I execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get not enough privileges error"): + client.app.expect("Not enough privileges") + client.app.expect("clickhouse1 :\) ") + + try: + with Given("I add the role and grant the select privilege to it for the table"): + node.query(f"CREATE ROLE {roles[0]}") + node.query(f"GRANT SELECT ON {table_name} TO {roles[0]}") + + with When("I re-execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get no errors"): + client.app.expect("Ok\.") + client.app.expect("clickhouse1 :\) ") + + finally: + with Finally("I delete the role"): + node.query(f"DROP ROLE IF EXISTS {roles[0]}") + @TestFeature @Name("roles") @Requirements( diff --git a/tests/testflows/ldap/external_user_directory/tests/server_config.py b/tests/testflows/ldap/external_user_directory/tests/server_config.py index 5df343b53df..8fbf4c0f7f2 100644 --- a/tests/testflows/ldap/external_user_directory/tests/server_config.py +++ b/tests/testflows/ldap/external_user_directory/tests/server_config.py @@ -149,7 +149,7 @@ def invalid_enable_tls_value(self, timeout=60): servers = {"openldap1": {"host": "openldap1", "port": "389", "enable_tls": "foo", "auth_dn_prefix": "cn=", "auth_dn_suffix": ",ou=users,dc=company,dc=com" }} - invalid_server_config(servers, message=message, tail=17, timeout=timeout) + invalid_server_config(servers, message=message, tail=18, timeout=timeout) @TestScenario @Requirements( From 53db7e564c67c0e7841a0cd2cf6f29ee86e9dc3c Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 17 Dec 2020 18:27:30 +0400 Subject: [PATCH 015/697] Do transformations based on prefix only --- programs/server/config.xml | 35 ++-------------- src/Access/LDAPAccessStorage.cpp | 72 ++++++-------------------------- src/Access/LDAPAccessStorage.h | 2 +- src/Access/LDAPParams.h | 12 +----- 4 files changed, 18 insertions(+), 103 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index e88e4a5bf3c..acf101ff8b0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -316,26 +316,9 @@ The resulting filter will be constructed by replacing all '{username}', '{bind_dn}', and '{base_dn}' substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. Note, that the special characters must be escaped properly in XML. - fail_if_all_rules_mismatch - flag to trigger failure if none of the rules were able to match and transform any - of the resulting strings returned by the LDAP search. By default, set to 'true'. - rule - section with matching and mapping info. - Each string will be matched to the regex and, if there is a full match, replaced using format string to - get the name of the local role that needs to be assigned to the user. - There can be multiple 'rule' sections defined inside the same 'role_mapping' section. Each of those rules - will be applied in the order they are listed in 'role_mapping' section. If a rule does not match a string - the next rule will be applied. If none of the rules matched a string and 'fail_if_all_rules_mismatch' is - set to 'false', that particular string will be ignored. If a rule matched a string and 'continue_on_match' - is set to 'false', the subsequent rules will not be applied to the current string. - match - regular expression, in ECMAScript format, used to match each entire string retured by LDAP serach. If - matched successfully, a replacement will be performed and the resulting string will be treated as local - role name and assigned to the user. By default, set to '.+' (match any non-empty string). - Note, that the special characters must be escaped properly in XML. - replace - format string used as a replace expression after the match succeeded. References like '$&' (entire - matched string), or '$n' (n-th subgroup) can be used. By default, set to '$&'. - Note, that the special characters must be escaped properly in XML. - continue_on_match - flag that indicates, whether to continue matching and mapping using the subsequent rules - after this rule successfully matched and mapped the string. By default, set to 'false'. - If set to 'true' and multiple rules match, multiple role names may be generated from one same input string. + prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by + the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated + as local role names. Empty, by default. Example: my_ldap_server @@ -348,17 +331,7 @@ cn subtree (&(objectClass=groupOfNames)(member={bind_dn})) - true - - clickhouse_(.+) - $1 - true - - - .+ - $& - false - + clickhouse_ --> diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index a7441d8c679..27f12b5c6a0 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -85,7 +84,7 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m rm_params.base_dn = config.getString(rm_prefix_str + "base_dn", ""); rm_params.search_filter = config.getString(rm_prefix_str + "search_filter", ""); rm_params.attribute = config.getString(rm_prefix_str + "attribute", "cn"); - rm_params.fail_if_all_rules_mismatch = config.getBool(rm_prefix_str + "fail_if_all_rules_mismatch", true); + rm_params.prefix = config.getString(rm_prefix_str + "prefix", ""); auto scope = config.getString(rm_prefix_str + "scope", "subtree"); boost::algorithm::to_lower(scope); @@ -95,33 +94,6 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m else if (scope == "children") rm_params.scope = LDAPSearchParams::Scope::CHILDREN; else throw Exception("Invalid value of 'scope' field in '" + key + "' section of LDAP user directory, must be one of 'base', 'one_level', 'subtree', or 'children'", ErrorCodes::BAD_ARGUMENTS); - - Poco::Util::AbstractConfiguration::Keys all_mapping_keys; - config.keys(rm_prefix, all_mapping_keys); - for (const auto & mkey : all_mapping_keys) - { - if (mkey != "rule" && mkey.find("rule[") != 0) - continue; - - const String rule_prefix = rm_prefix_str + mkey; - const String rule_prefix_str = rule_prefix + '.'; - rm_params.rules.emplace_back(); - auto & rule = rm_params.rules.back(); - - rule.match = config.getString(rule_prefix_str + "match", ".+"); - try - { - // Construct unused regex instance just to check the syntax. - std::regex(rule.match, std::regex_constants::ECMAScript); - } - catch (const std::regex_error & e) - { - throw Exception("ECMAScript regex syntax error in 'match' field in '" + mkey + "' rule of '" + key + "' section of LDAP user directory: " + e.what(), ErrorCodes::BAD_ARGUMENTS); - } - - rule.replace = config.getString(rule_prefix_str + "replace", "$&"); - rule.continue_on_match = config.getBool(rule_prefix_str + "continue_on_match", false); - } } } @@ -255,7 +227,7 @@ void LDAPAccessStorage::grantRolesNoLock(User & user, const LDAPSearchResultsLis auto & granted_roles = user.granted_roles.roles; // Map external role names to local role names. - const auto user_role_names = mapExternalRolesNoLock(user_name, external_roles); + const auto user_role_names = mapExternalRolesNoLock(external_roles); external_role_hashes.erase(user_name); granted_roles.clear(); @@ -365,36 +337,27 @@ void LDAPAccessStorage::updateRolesNoLock(const UUID & id, const String & user_n } -std::set LDAPAccessStorage::mapExternalRolesNoLock(const String & user_name, const LDAPSearchResultsList & external_roles) const +std::set LDAPAccessStorage::mapExternalRolesNoLock(const LDAPSearchResultsList & external_roles) const { std::set role_names; if (external_roles.size() != role_search_params.size()) - throw Exception("Unable to match external roles to mapping rules", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Unable to map external roles", ErrorCodes::BAD_ARGUMENTS); - std::unordered_map re_cache; for (std::size_t i = 0; i < external_roles.size(); ++i) { const auto & external_role_set = external_roles[i]; - const auto & rules = role_search_params[i].rules; + const auto & prefix = role_search_params[i].prefix; + for (const auto & external_role : external_role_set) { - bool have_match = false; - for (const auto & rule : rules) + if ( + prefix.size() < external_role.size() && + external_role.compare(0, prefix.size(), prefix) == 0 + ) { - const auto & re = re_cache.try_emplace(rule.match, rule.match, std::regex_constants::ECMAScript | std::regex_constants::optimize).first->second; - std::smatch match_results; - if (std::regex_match(external_role, match_results, re)) - { - role_names.emplace(match_results.format(rule.replace)); - have_match = true; - if (!rule.continue_on_match) - break; - } + role_names.emplace(external_role.substr(prefix.size())); } - - if (!have_match && role_search_params[i].fail_if_all_rules_mismatch) - throw Exception("None of the external role mapping rules were able to match '" + external_role + "' string, received from LDAP server '" + ldap_server + "' for user '" + user_name + "'", ErrorCodes::BAD_ARGUMENTS); } } @@ -436,7 +399,7 @@ String LDAPAccessStorage::getStorageParamsJSON() const role_mapping_json.set("base_dn", role_mapping.base_dn); role_mapping_json.set("search_filter", role_mapping.search_filter); role_mapping_json.set("attribute", role_mapping.attribute); - role_mapping_json.set("fail_if_all_rules_mismatch", role_mapping.fail_if_all_rules_mismatch); + role_mapping_json.set("prefix", role_mapping.prefix); String scope; switch (role_mapping.scope) @@ -448,17 +411,6 @@ String LDAPAccessStorage::getStorageParamsJSON() const } role_mapping_json.set("scope", scope); - Poco::JSON::Array rules_json; - for (const auto & rule : role_mapping.rules) - { - Poco::JSON::Object rule_json; - rule_json.set("match", rule.match); - rule_json.set("replace", rule.replace); - rule_json.set("continue_on_match", rule.continue_on_match); - rules_json.add(rule_json); - } - role_mapping_json.set("rules", rules_json); - role_mappings_json.add(role_mapping_json); } params_json.set("role_mappings", role_mappings_json); diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index eaa39dd1624..8ec7325f9c2 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -67,7 +67,7 @@ private: void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); void grantRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const; void updateRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const; - std::set mapExternalRolesNoLock(const String & user_name, const LDAPSearchResultsList & external_roles) const; + std::set mapExternalRolesNoLock(const LDAPSearchResultsList & external_roles) const; bool isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const; mutable std::recursive_mutex mutex; diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index 811d3775684..be5d1d66542 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -10,14 +10,6 @@ namespace DB { -struct LDAPRoleMappingRules -{ - String match = ".+"; - String replace = "$&"; - - bool continue_on_match = false; -}; - struct LDAPSearchParams { enum class Scope @@ -32,9 +24,7 @@ struct LDAPSearchParams String search_filter; String attribute = "cn"; Scope scope = Scope::SUBTREE; - - bool fail_if_all_rules_mismatch = false; - std::vector rules; + String prefix; }; using LDAPSearchParamsList = std::vector; From c4b85f2dcdcdb751c37abecbd99b167ba53eda20 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 17 Dec 2020 18:48:12 +0400 Subject: [PATCH 016/697] Simplify the code --- src/Access/LDAPAccessStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 27f12b5c6a0..e86a82c1ac2 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -356,7 +356,7 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const LDAPSearchResul external_role.compare(0, prefix.size(), prefix) == 0 ) { - role_names.emplace(external_role.substr(prefix.size())); + role_names.emplace(external_role, prefix.size()); } } } From ded199ce2768246467a001abff74ae2b3b547d95 Mon Sep 17 00:00:00 2001 From: Daria Mozhaeva Date: Wed, 23 Dec 2020 18:32:35 +0300 Subject: [PATCH 017/697] Edit and translate to Russia --- .../integrations/embedded-rocksdb.md | 2 +- docs/en/operations/settings/settings.md | 2 +- .../integrations/embedded-rocksdb.md | 45 +++++++++++++++++++ docs/ru/operations/settings/settings.md | 25 +++++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 857e148277c..79e0e040377 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -40,6 +40,6 @@ PRIMARY KEY key ## Description {#description} -- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key. +- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a rocksdb key. - columns other than the primary key will be serialized in binary as rocksdb value in corresponding order. - queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index fc921f2ef7e..1ff2ea77fd0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -445,7 +445,7 @@ Possible values: - `'simple'` - Simple output format. - Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone. + Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. The calculation is performed according to the data type's time zone (if present) or server time zone. - `'iso'` - ISO output format. diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..e160eb2bdf5 --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,45 @@ +--- +toc_priority: 6 +toc_title: EmbeddedRocksDB +--- + +# EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine} + +Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/). + +`EmbeddedRocksDB` дает возможность: + +## Создавать таблицу {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +``` + +Обязательные параметры: + +- `primary_key_name` – любое имя столбца в списке столбцов. + +Пример: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key +``` + +## Описание {#description} + +- должен быть указан `primary key`, он поддерживает только один столбец в первичном ключе. Первичный ключ будет сериализован в двоичном формате как ключ rocksdb. +- столбцы, отличные от первичного ключа, будут сериализованы в двоичном формате как значение rockdb в соответствующем порядке. +- запросы с фильтрацией по ключу `equals` или `in` будут оптимизированы для поиска по нескольким ключам из rocksdb. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 0a8094231c2..82051a9f999 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -421,6 +421,31 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; - [Тип данных DateTime.](../../sql-reference/data-types/datetime.md) - [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) +## date_time_output_format {#settings-date_time_output_format} + +Позволяет выбрать разные выходные форматы текстового представления даты и времени. + +Возможные значения: + +- `'simple'` - простой выходной формат. + + Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `'2019-08-20 10:18:56'`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера. + +- `'iso'` - выходной формат ISO. + + Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `'2019-08-20T10:18:56Z'`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC). + +- `'unix_timestamp'` - выходной формат Unix. + + Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `'1566285536'`. + +Значение по умолчанию: `'simple'`. + +См. также: + +- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md) +- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) + ## join_default_strictness {#settings-join_default_strictness} Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join). From 5c9fe8ff7e6c826bfbcb7fbb42a757ab33728afe Mon Sep 17 00:00:00 2001 From: Daria Mozhaeva Date: Wed, 23 Dec 2020 18:35:32 +0300 Subject: [PATCH 018/697] Edit and translate to Russian. --- docs/ru/sql-reference/data-types/datetime.md | 3 ++- docs/ru/sql-reference/data-types/datetime64.md | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 87c5da68f35..74cec551c3f 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,7 +27,7 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format). @@ -120,6 +120,7 @@ FROM dt - [Функции для работы с датой и временем](../../sql-reference/data-types/datetime.md) - [Функции для работы с массивами](../../sql-reference/data-types/datetime.md) - [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime.md#server_configuration_parameters-timezone) - [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime.md#operators-datetime) - [Тип данных `Date`](date.md) diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index 0a602e44636..275783f0097 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -96,6 +96,7 @@ FROM dt - [Функции для работы с датой и временем](../../sql-reference/data-types/datetime64.md) - [Функции для работы с массивами](../../sql-reference/data-types/datetime64.md) - [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime64.md#server_configuration_parameters-timezone) - [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime64.md#operators-datetime) - [Тип данных `Date`](date.md) From d53919d8320788cda04e61d7ea37cad24219110f Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Wed, 23 Dec 2020 14:46:47 -0500 Subject: [PATCH 019/697] Updating ldap/external_user_directory/requirements/requirements.py --- .../requirements/requirements.py | 57 +++++++++++++------ 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/tests/testflows/ldap/external_user_directory/requirements/requirements.py b/tests/testflows/ldap/external_user_directory/requirements/requirements.py index 3b77685188e..7ee45ac485e 100644 --- a/tests/testflows/ldap/external_user_directory/requirements/requirements.py +++ b/tests/testflows/ldap/external_user_directory/requirements/requirements.py @@ -514,16 +514,14 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Restart_Server_ParallelLogins = Requiremen RQ_SRS_009_LDAP_ExternalUserDirectory_Role_Removed = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed', - version='1.0', + version='2.0', priority=None, group=None, type=None, uid=None, description=( - '[ClickHouse] SHALL reject authentication attempt if any of the roles that are specified in the configuration\n' - 'of the external user directory are not defined at the time of the authentication attempt\n' - 'with an exception that if a user was able to authenticate in past and its internal user object was created and cached\n' - 'then the user SHALL be able to authenticate again, even if one of the roles is missing.\n' + '[ClickHouse] SHALL allow authentication even if the roles that are specified in the configuration\n' + 'of the external user directory are not defined at the time of the authentication attempt.\n' '\n' ), link=None, @@ -616,6 +614,24 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Role_RemovedPrivilege = Requirement( level=4, num='4.2.2.6') +RQ_SRS_009_LDAP_ExternalUserDirectory_Role_NotPresent_Added = Requirement( + name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL add a role to the users authenticated using LDAP external user directory\n' + 'that did not exist during the time of authentication but are defined in the \n' + 'configuration file as soon as the role with that name becomes\n' + 'available.\n' + '\n' + ), + link=None, + level=4, + num='4.2.2.7') + RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid', version='1.0', @@ -1287,14 +1303,14 @@ RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_MoreT RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Users_Parameters_Roles_Invalid = Requirement( name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid', - version='1.0', + version='2.0', priority=None, group=None, type=None, uid=None, description=( - '[ClickHouse] SHALL return an error if the role specified in the ``\n' - 'parameter does not exist locally.\n' + '[ClickHouse] SHALL not return an error if the role specified in the ``\n' + 'parameter does not exist locally. \n' '\n' ), link=None, @@ -1497,6 +1513,7 @@ SRS_009_ClickHouse_LDAP_External_User_Directory = Specification( Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.New', level=4, num='4.2.2.4'), Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NewPrivilege', level=4, num='4.2.2.5'), Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.RemovedPrivilege', level=4, num='4.2.2.6'), + Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added', level=4, num='4.2.2.7'), Heading(name='Configuration', level=3, num='4.2.3'), Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid', level=4, num='4.2.3.1'), Heading(name='RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Definition', level=4, num='4.2.3.2'), @@ -1587,6 +1604,7 @@ SRS_009_ClickHouse_LDAP_External_User_Directory = Specification( RQ_SRS_009_LDAP_ExternalUserDirectory_Role_New, RQ_SRS_009_LDAP_ExternalUserDirectory_Role_NewPrivilege, RQ_SRS_009_LDAP_ExternalUserDirectory_Role_RemovedPrivilege, + RQ_SRS_009_LDAP_ExternalUserDirectory_Role_NotPresent_Added, RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid, RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Definition, RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Name, @@ -1690,6 +1708,7 @@ SRS_009_ClickHouse_LDAP_External_User_Directory = Specification( * 4.2.2.4 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.New](#rqsrs-009ldapexternaluserdirectoryrolenew) * 4.2.2.5 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NewPrivilege](#rqsrs-009ldapexternaluserdirectoryrolenewprivilege) * 4.2.2.6 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.RemovedPrivilege](#rqsrs-009ldapexternaluserdirectoryroleremovedprivilege) + * 4.2.2.7 [RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added](#rqsrs-009ldapexternaluserdirectoryrolenotpresentadded) * 4.2.3 [Configuration](#configuration) * 4.2.3.1 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid](#rqsrs-009ldapexternaluserdirectoryconfigurationserverinvalid) * 4.2.3.2 [RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Definition](#rqsrs-009ldapexternaluserdirectoryconfigurationserverdefinition) @@ -1973,12 +1992,10 @@ are configured during parallel [LDAP] user logins. #### Roles ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed -version: 1.0 +version: 2.0 -[ClickHouse] SHALL reject authentication attempt if any of the roles that are specified in the configuration -of the external user directory are not defined at the time of the authentication attempt -with an exception that if a user was able to authenticate in past and its internal user object was created and cached -then the user SHALL be able to authenticate again, even if one of the roles is missing. +[ClickHouse] SHALL allow authentication even if the roles that are specified in the configuration +of the external user directory are not defined at the time of the authentication attempt. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.Removed.Privileges version: 1.0 @@ -2016,6 +2033,14 @@ version: 1.0 including cached users when privilege is removed from all the roles specified in the configuration of the external user directory. +##### RQ.SRS-009.LDAP.ExternalUserDirectory.Role.NotPresent.Added +version: 1.0 + +[ClickHouse] SHALL add a role to the users authenticated using LDAP external user directory +that did not exist during the time of authentication but are defined in the +configuration file as soon as the role with that name becomes +available. + #### Configuration ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Server.Invalid @@ -2298,10 +2323,10 @@ in the `` sub-section in the `` if more than one `roles` parameter is defined in the configuration. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Invalid -version: 1.0 +version: 2.0 -[ClickHouse] SHALL return an error if the role specified in the `` -parameter does not exist locally. +[ClickHouse] SHALL not return an error if the role specified in the `` +parameter does not exist locally. ##### RQ.SRS-009.LDAP.ExternalUserDirectory.Configuration.Users.Parameters.Roles.Empty version: 1.0 From 49631a39ae843426a87bd94baa2398b125838e3a Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:05:23 +0300 Subject: [PATCH 020/697] Update docs/ru/engines/table-engines/integrations/embedded-rocksdb.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md index e160eb2bdf5..2074021121a 100644 --- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -9,7 +9,7 @@ toc_title: EmbeddedRocksDB `EmbeddedRocksDB` дает возможность: -## Создавать таблицу {#table_engine-EmbeddedRocksDB-creating-a-table} +## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] From e00b0117410d7e024889e82f825757bf769b8a18 Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:05:52 +0300 Subject: [PATCH 021/697] Update docs/ru/operations/settings/settings.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 82051a9f999..2f940758e09 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -443,7 +443,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; См. также: -- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md) +- [Тип данных DateTime](../../sql-reference/data-types/datetime.md) - [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) ## join_default_strictness {#settings-join_default_strictness} From 5bc3d563d56bc837c28d177af7eb5066e4a24970 Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:06:54 +0300 Subject: [PATCH 022/697] Update docs/ru/operations/settings/settings.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2f940758e09..b48ca668aa4 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -444,7 +444,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; См. также: - [Тип данных DateTime](../../sql-reference/data-types/datetime.md) -- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) ## join_default_strictness {#settings-join_default_strictness} From bc3e8f77f67ec0bd76533abefd0f4707185e82d6 Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:07:20 +0300 Subject: [PATCH 023/697] Update docs/ru/engines/table-engines/integrations/embedded-rocksdb.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md index 2074021121a..e57b83070dc 100644 --- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Обязательные параметры: -- `primary_key_name` – любое имя столбца в списке столбцов. +- `primary_key_name` – любое имя столбца из списка столбцов. Пример: From 8088b17ae25a76ae10ea74ac01aaba172500b38a Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:08:22 +0300 Subject: [PATCH 024/697] Update docs/ru/engines/table-engines/integrations/embedded-rocksdb.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md index e57b83070dc..3fd1b1e8d89 100644 --- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -17,7 +17,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +) ENGINE = EmbeddedRocksDB +PRIMARY KEY(primary_key_name); ``` Обязательные параметры: From 47e8783f5be5a133ab133a18b90ced056aa00b4c Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Fri, 25 Dec 2020 15:08:35 +0300 Subject: [PATCH 025/697] Update docs/ru/engines/table-engines/integrations/embedded-rocksdb.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md index 3fd1b1e8d89..575fc279b74 100644 --- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -36,7 +36,7 @@ CREATE TABLE test `v3` Float32, ) ENGINE = EmbeddedRocksDB -PRIMARY KEY key +PRIMARY KEY key; ``` ## Описание {#description} From b60c00ba7477ff4db5a9c9b7c962332c5248a4ce Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 25 Dec 2020 17:52:46 +0300 Subject: [PATCH 026/697] refactoring of TTL stream --- src/DataStreams/ITTLAlgorithm.cpp | 61 +++ src/DataStreams/ITTLAlgorithm.h | 43 ++ src/DataStreams/TTLAggregationAlgorithm.cpp | 173 +++++++ src/DataStreams/TTLAggregationAlgorithm.h | 40 ++ src/DataStreams/TTLBlockInputStream.cpp | 441 +++--------------- src/DataStreams/TTLBlockInputStream.h | 61 +-- src/DataStreams/TTLColumnAlgorithm.cpp | 88 ++++ src/DataStreams/TTLColumnAlgorithm.h | 29 ++ src/DataStreams/TTLDeleteAlgorithm.cpp | 58 +++ src/DataStreams/TTLDeleteAlgorithm.h | 21 + src/DataStreams/TTLUpdateInfoAlgorithm.cpp | 47 ++ src/DataStreams/TTLUpdateInfoAlgorithm.h | 31 ++ src/DataStreams/ya.make | 5 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 + .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 87 ++-- .../MergeTree/MergeTreeDataPartTTLInfo.h | 5 +- .../MergeTree/MergeTreeDataWriter.cpp | 3 + src/Storages/StorageInMemoryMetadata.cpp | 12 +- src/Storages/StorageInMemoryMetadata.h | 4 + src/Storages/System/StorageSystemParts.cpp | 5 + src/Storages/TTLDescription.cpp | 8 +- src/Storages/TTLDescription.h | 2 + 22 files changed, 759 insertions(+), 471 deletions(-) create mode 100644 src/DataStreams/ITTLAlgorithm.cpp create mode 100644 src/DataStreams/ITTLAlgorithm.h create mode 100644 src/DataStreams/TTLAggregationAlgorithm.cpp create mode 100644 src/DataStreams/TTLAggregationAlgorithm.h create mode 100644 src/DataStreams/TTLColumnAlgorithm.cpp create mode 100644 src/DataStreams/TTLColumnAlgorithm.h create mode 100644 src/DataStreams/TTLDeleteAlgorithm.cpp create mode 100644 src/DataStreams/TTLDeleteAlgorithm.h create mode 100644 src/DataStreams/TTLUpdateInfoAlgorithm.cpp create mode 100644 src/DataStreams/TTLUpdateInfoAlgorithm.h diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp new file mode 100644 index 00000000000..f0e98e9ab1c --- /dev/null +++ b/src/DataStreams/ITTLAlgorithm.cpp @@ -0,0 +1,61 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ITTLAlgorithm::ITTLAlgorithm( + const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + : description(description_) + , old_ttl_info(old_ttl_info_) + , current_time(current_time_) + , force(force_) + , date_lut(DateLUT::instance()) +{ +} + +bool ITTLAlgorithm::isTTLExpired(time_t ttl) const +{ + return (ttl && (ttl <= current_time)); +} + +ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column) +{ + if (!expression) + return nullptr; + + if (block.has(result_column)) + return block.getByName(result_column).column; + + Block block_copy; + for (const auto & column_name : expression->getRequiredColumns()) + block_copy.insert(block.getByName(column_name)); + + expression->execute(block_copy); + return block_copy.getByName(result_column).column; +} + +UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const +{ + if (const ColumnUInt16 * column_date = typeid_cast(column)) + return date_lut.fromDayNum(DayNum(column_date->getData()[index])); + else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) + return column_date_time->getData()[index]; + else if (const ColumnConst * column_const = typeid_cast(column)) + { + if (typeid_cast(&column_const->getDataColumn())) + return date_lut.fromDayNum(DayNum(column_const->getValue())); + else if (typeid_cast(&column_const->getDataColumn())) + return column_const->getValue(); + } + + throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); +} + +} diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h new file mode 100644 index 00000000000..28a371e9289 --- /dev/null +++ b/src/DataStreams/ITTLAlgorithm.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class ITTLAlgorithm +{ +public: + using TTLInfo = IMergeTreeDataPart::TTLInfo; + using MutableDataPartPtr = MergeTreeMutableDataPartPtr; + + ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + virtual ~ITTLAlgorithm() = default; + + virtual void execute(Block & block) = 0; + virtual void finalize(const MutableDataPartPtr & data_part) const = 0; + + bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); } + bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); } + +protected: + bool isTTLExpired(time_t ttl) const; + UInt32 getTimestampByIndex(const IColumn * column, size_t index) const; + static ColumnPtr extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column); + + const TTLDescription description; + const TTLInfo old_ttl_info; + const time_t current_time; + const bool force; + TTLInfo new_ttl_info; + +private: + const DateLUTImpl & date_lut; +}; + +using TTLAlgorithmPtr = std::unique_ptr; + +} diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp new file mode 100644 index 00000000000..6cc1ac00b7e --- /dev/null +++ b/src/DataStreams/TTLAggregationAlgorithm.cpp @@ -0,0 +1,173 @@ +#include + +namespace DB +{ + +TTLAggregationAlgorithm::TTLAggregationAlgorithm( + const TTLDescription & description_, + const TTLInfo & old_ttl_info_, + time_t current_time_, + bool force_, + const Block & header_, + const MergeTreeData & storage_) + : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) + , header(header_) +{ + current_key_value.resize(description.group_by_keys.size()); + + ColumnNumbers keys; + for (const auto & key : description.group_by_keys) + keys.push_back(header.getPositionByName(key)); + + key_columns.resize(description.group_by_keys.size()); + AggregateDescriptions aggregates = description.aggregate_descriptions; + + for (auto & descr : aggregates) + if (descr.arguments.empty()) + for (const auto & name : descr.argument_names) + descr.arguments.push_back(header.getPositionByName(name)); + + columns_for_aggregator.resize(description.aggregate_descriptions.size()); + const Settings & settings = storage_.global_context.getSettingsRef(); + + Aggregator::Params params(header, keys, aggregates, + false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0, + settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, + storage_.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + + aggregator = std::make_unique(params); +} + +void TTLAggregationAlgorithm::execute(Block & block) +{ + if (!block) + { + if (!aggregation_result.empty()) + { + MutableColumns result_columns = header.cloneEmptyColumns(); + finalizeAggregates(result_columns); + block = header.cloneWithColumns(std::move(result_columns)); + } + + return; + } + + const auto & column_names = header.getNames(); + MutableColumns result_columns = header.cloneEmptyColumns(); + MutableColumns aggregate_columns = header.cloneEmptyColumns(); + + auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column); + + size_t rows_aggregated = 0; + size_t current_key_start = 0; + size_t rows_with_current_key = 0; + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); + bool where_filter_passed = !where_column || where_column->getBool(i); + bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; + + bool same_as_current = true; + for (size_t j = 0; j < description.group_by_keys.size(); ++j) + { + const String & key_column = description.group_by_keys[j]; + const IColumn * values_column = block.getByName(key_column).column.get(); + if (!same_as_current || (*values_column)[i] != current_key_value[j]) + { + values_column->get(i, current_key_value[j]); + same_as_current = false; + } + } + + if (!same_as_current) + { + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + finalizeAggregates(result_columns); + + current_key_start = rows_aggregated; + rows_with_current_key = 0; + } + + if (ttl_expired) + { + ++rows_with_current_key; + ++rows_aggregated; + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = aggregate_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + else + { + new_ttl_info.update(cur_ttl); + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = result_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + } + + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + + block = header.cloneWithColumns(std::move(result_columns)); +} + +void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length) +{ + Columns aggregate_chunk; + aggregate_chunk.reserve(aggregate_columns.size()); + for (const auto & name : header.getNames()) + { + const auto & column = aggregate_columns[header.getPositionByName(name)]; + ColumnPtr chunk_column = column->cut(start_pos, length); + aggregate_chunk.emplace_back(std::move(chunk_column)); + } + + aggregator->executeOnBlock(aggregate_chunk, length, aggregation_result, key_columns, + columns_for_aggregator, no_more_keys); +} + +void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns) +{ + if (!aggregation_result.empty()) + { + auto aggregated_res = aggregator->convertToBlocks(aggregation_result, true, 1); + for (auto & agg_block : aggregated_res) + { + for (const auto & it : description.set_parts) + it.expression->execute(agg_block); + + for (const auto & name : description.group_by_keys) + { + const IColumn * values_column = agg_block.getByName(name).column.get(); + auto & result_column = result_columns[header.getPositionByName(name)]; + result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); + } + + for (const auto & it : description.set_parts) + { + const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get(); + auto & result_column = result_columns[header.getPositionByName(it.column_name)]; + result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); + } + } + } + + aggregation_result.invalidate(); +} + +void TTLAggregationAlgorithm::finalize(const MutableDataPartPtr & data_part) const +{ + data_part->ttl_infos.group_by_ttl[description.result_column] = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); +} + +} diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/DataStreams/TTLAggregationAlgorithm.h new file mode 100644 index 00000000000..977e755ca8b --- /dev/null +++ b/src/DataStreams/TTLAggregationAlgorithm.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class TTLAggregationAlgorithm final : public ITTLAlgorithm +{ +public: + TTLAggregationAlgorithm( + const TTLDescription & description_, + const TTLInfo & old_ttl_info_, + time_t current_time_, + bool force_, + const Block & header_, + const MergeTreeData & storage_); + + void execute(Block & block) override; + void finalize(const MutableDataPartPtr & data_part) const override; + +private: + // Calculate aggregates of aggregate_columns into aggregation_result + void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length); + + /// Finalize aggregation_result into result_columns + void finalizeAggregates(MutableColumns & result_columns); + + const Block header; + std::unique_ptr aggregator; + Row current_key_value; + AggregatedDataVariants aggregation_result; + ColumnRawPtrs key_columns; + Aggregator::AggregateColumns columns_for_aggregator; + bool no_more_keys = false; +}; + +} diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index ab80f69d4d2..8c680f6875b 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -8,15 +8,14 @@ #include #include +#include +#include +#include +#include + namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - TTLBlockInputStream::TTLBlockInputStream( const BlockInputStreamPtr & input_, const MergeTreeData & storage_, @@ -24,83 +23,62 @@ TTLBlockInputStream::TTLBlockInputStream( const MergeTreeData::MutableDataPartPtr & data_part_, time_t current_time_, bool force_) - : storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , data_part(data_part_) - , current_time(current_time_) - , force(force_) - , old_ttl_infos(data_part->ttl_infos) - , log(&Poco::Logger::get(storage.getLogName() + " (TTLBlockInputStream)")) - , date_lut(DateLUT::instance()) + : data_part(data_part_) + , log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)")) { children.push_back(input_); header = children.at(0)->getHeader(); + auto old_ttl_infos = data_part->ttl_infos; - const auto & storage_columns = metadata_snapshot->getColumns(); - const auto & column_defaults = storage_columns.getDefaults(); - - ASTPtr default_expr_list = std::make_shared(); - for (const auto & [name, _] : metadata_snapshot->getColumnTTLs()) + if (metadata_snapshot_->hasRowsTTL()) { - auto it = column_defaults.find(name); - if (it != column_defaults.end()) + const auto & rows_ttl = metadata_snapshot_->getRowsTTL(); + auto algorithm = std::make_unique( + rows_ttl, old_ttl_infos.table_ttl, current_time_, force_); + + /// Skip all data if table ttl is expired for part + if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression) + all_data_dropped = true; + + delete_algorithm = algorithm.get(); + algorithms.emplace_back(std::move(algorithm)); + } + + for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs()) + algorithms.emplace_back(std::make_unique( + group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_)); + + if (metadata_snapshot_->hasAnyColumnTTL()) + { + const auto & storage_columns = metadata_snapshot_->getColumns(); + const auto & column_defaults = storage_columns.getDefaults(); + + for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs()) { - auto column = storage_columns.get(name); - auto expression = it->second.expression->clone(); - default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first)); + ExpressionActionsPtr default_expression; + auto it = column_defaults.find(name); + if (it != column_defaults.end()) + { + const auto & column = storage_columns.get(name); + auto default_ast = it->second.expression->clone(); + default_ast = setAlias(addTypeConversionToAST(std::move(default_ast), column.type->getName()), it->first); + + auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical()); + default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true); + } + + algorithms.emplace_back(std::make_unique( + description, old_ttl_infos.columns_ttl[name], current_time_, force_, name, default_expression)); } } - for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl) - { - if (force || isTTLExpired(ttl_info.min)) - { - new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{}); - empty_columns.emplace(name); - } - else - new_ttl_infos.columns_ttl.emplace(name, ttl_info); - } + for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs()) + algorithms.emplace_back(std::make_unique( + move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_)); - if (!force && !isTTLExpired(old_ttl_infos.table_ttl.min)) - new_ttl_infos.table_ttl = old_ttl_infos.table_ttl; - - if (!default_expr_list->children.empty()) - { - auto syntax_result = TreeRewriter(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical()); - defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true); - } - - auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); - if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY) - { - current_key_value.resize(storage_rows_ttl.group_by_keys.size()); - - ColumnNumbers keys; - for (const auto & key : storage_rows_ttl.group_by_keys) - keys.push_back(header.getPositionByName(key)); - agg_key_columns.resize(storage_rows_ttl.group_by_keys.size()); - - AggregateDescriptions aggregates = storage_rows_ttl.aggregate_descriptions; - for (auto & descr : aggregates) - if (descr.arguments.empty()) - for (const auto & name : descr.argument_names) - descr.arguments.push_back(header.getPositionByName(name)); - - agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size()); - const Settings & settings = storage.global_context.getSettingsRef(); - - Aggregator::Params params(header, keys, aggregates, - false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0, - settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - storage.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); - aggregator = std::make_unique(params); - } -} - -bool TTLBlockInputStream::isTTLExpired(time_t ttl) const -{ - return (ttl && (ttl <= current_time)); + for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs()) + algorithms.emplace_back(std::make_unique( + recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_)); } Block reorderColumns(Block block, const Block & header) @@ -114,325 +92,30 @@ Block reorderColumns(Block block, const Block & header) Block TTLBlockInputStream::readImpl() { - /// Skip all data if table ttl is expired for part - auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); - if (metadata_snapshot->hasRowsTTL() - && !storage_rows_ttl.where_expression - && storage_rows_ttl.mode != TTLMode::GROUP_BY - && isTTLExpired(old_ttl_infos.table_ttl.max)) - { - rows_removed = data_part->rows_count; + if (all_data_dropped) return {}; - } - Block block = children.at(0)->read(); + auto block = children.at(0)->read(); + for (const auto & algorithm : algorithms) + algorithm->execute(block); + if (!block) - { - if (aggregator && !agg_result.empty()) - { - MutableColumns result_columns = header.cloneEmptyColumns(); - finalizeAggregates(result_columns); - block = header.cloneWithColumns(std::move(result_columns)); - } - return block; - } - - if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) - executeRowsTTL(block); - - removeValuesWithExpiredColumnTTL(block); - updateMovesTTL(block); - updateRecompressionTTL(block); return reorderColumns(std::move(block), header); } void TTLBlockInputStream::readSuffixImpl() { - for (const auto & elem : new_ttl_infos.columns_ttl) - new_ttl_infos.updatePartMinMaxTTL(elem.second.min, elem.second.max); + data_part->ttl_infos = {}; + for (const auto & algorithm : algorithms) + algorithm->finalize(data_part); - new_ttl_infos.updatePartMinMaxTTL(new_ttl_infos.table_ttl.min, new_ttl_infos.table_ttl.max); - - data_part->ttl_infos = std::move(new_ttl_infos); - data_part->expired_columns = std::move(empty_columns); - - if (rows_removed) + if (delete_algorithm) + { + size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows(); LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name); -} - -static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, const Block & block, const String & result_column) -{ - if (block.has(result_column)) - return block.getByName(result_column).column; - - Block block_copy; - for (const auto & column_name : expression.getRequiredColumns()) - block_copy.insert(block.getByName(column_name)); - - expression.execute(block_copy); - return block_copy.getByName(result_column).column; -} - -void TTLBlockInputStream::executeRowsTTL(Block & block) -{ - auto rows_ttl = metadata_snapshot->getRowsTTL(); - auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column); - - auto where_result_column = rows_ttl.where_expression ? - extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column): nullptr; - - if (aggregator) - aggregateRowsWithExpiredTTL(block, ttl_column, where_result_column); - else - removeRowsWithExpiredTTL(block, ttl_column, where_result_column); -} - -void TTLBlockInputStream::removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column) -{ - MutableColumns result_columns; - const auto & column_names = header.getNames(); - - result_columns.reserve(column_names.size()); - for (auto it = column_names.begin(); it != column_names.end(); ++it) - { - const IColumn * values_column = block.getByName(*it).column.get(); - MutableColumnPtr result_column = values_column->cloneEmpty(); - result_column->reserve(block.rows()); - - for (size_t i = 0; i < block.rows(); ++i) - { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - bool where_filter_passed = !where_column || where_column->getBool(i); - if (!isTTLExpired(cur_ttl) || !where_filter_passed) - { - new_ttl_infos.table_ttl.update(cur_ttl); - result_column->insertFrom(*values_column, i); - } - else if (it == column_names.begin()) - ++rows_removed; - } - - result_columns.emplace_back(std::move(result_column)); - } - - block = header.cloneWithColumns(std::move(result_columns)); -} - -void TTLBlockInputStream::aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column) -{ - const auto & column_names = header.getNames(); - MutableColumns result_columns = header.cloneEmptyColumns(); - MutableColumns aggregate_columns = header.cloneEmptyColumns(); - - size_t rows_aggregated = 0; - size_t current_key_start = 0; - size_t rows_with_current_key = 0; - auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); - - for (size_t i = 0; i < block.rows(); ++i) - { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - bool where_filter_passed = !where_column || where_column->getBool(i); - bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; - - bool same_as_current = true; - for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j) - { - const String & key_column = storage_rows_ttl.group_by_keys[j]; - const IColumn * values_column = block.getByName(key_column).column.get(); - if (!same_as_current || (*values_column)[i] != current_key_value[j]) - { - values_column->get(i, current_key_value[j]); - same_as_current = false; - } - } - - if (!same_as_current) - { - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); - finalizeAggregates(result_columns); - - current_key_start = rows_aggregated; - rows_with_current_key = 0; - } - - if (ttl_expired) - { - ++rows_with_current_key; - ++rows_aggregated; - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = aggregate_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } - } - else - { - new_ttl_infos.table_ttl.update(cur_ttl); - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = result_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } - } - } - - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); - - block = header.cloneWithColumns(std::move(result_columns)); -} - -void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length) -{ - Columns aggregate_chunk; - aggregate_chunk.reserve(aggregate_columns.size()); - for (const auto & name : header.getNames()) - { - const auto & column = aggregate_columns[header.getPositionByName(name)]; - ColumnPtr chunk_column = column->cut(start_pos, length); - aggregate_chunk.emplace_back(std::move(chunk_column)); - } - aggregator->executeOnBlock(aggregate_chunk, length, agg_result, agg_key_columns, - agg_aggregate_columns, agg_no_more_keys); -} - -void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) -{ - if (!agg_result.empty()) - { - auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1); - auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); - for (auto & agg_block : aggregated_res) - { - for (const auto & it : storage_rows_ttl.set_parts) - it.expression->execute(agg_block); - - for (const auto & name : storage_rows_ttl.group_by_keys) - { - const IColumn * values_column = agg_block.getByName(name).column.get(); - auto & result_column = result_columns[header.getPositionByName(name)]; - result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); - } - - for (const auto & it : storage_rows_ttl.set_parts) - { - const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get(); - auto & result_column = result_columns[header.getPositionByName(it.column_name)]; - result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); - } - } - } - - agg_result.invalidate(); -} - -void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) -{ - Block block_with_defaults; - if (defaults_expression) - { - block_with_defaults = block; - defaults_expression->execute(block_with_defaults); - } - - for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) - { - /// If we read not all table columns. E.g. while mutation. - if (!block.has(name)) - continue; - - const auto & old_ttl_info = old_ttl_infos.columns_ttl[name]; - auto & new_ttl_info = new_ttl_infos.columns_ttl[name]; - - /// Nothing to do - if (!force && !isTTLExpired(old_ttl_info.min)) - continue; - - /// Later drop full column - if (isTTLExpired(old_ttl_info.max)) - continue; - - auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column); - - ColumnPtr default_column = nullptr; - if (block_with_defaults.has(name)) - default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst(); - - auto & column_with_type = block.getByName(name); - const IColumn * values_column = column_with_type.column.get(); - MutableColumnPtr result_column = values_column->cloneEmpty(); - result_column->reserve(block.rows()); - - for (size_t i = 0; i < block.rows(); ++i) - { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - if (isTTLExpired(cur_ttl)) - { - if (default_column) - result_column->insertFrom(*default_column, i); - else - result_column->insertDefault(); - } - else - { - new_ttl_info.update(cur_ttl); - empty_columns.erase(name); - result_column->insertFrom(*values_column, i); - } - } - column_with_type.column = std::move(result_column); } } -void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map) -{ - for (const auto & ttl_entry : descriptions) - { - auto & new_ttl_info = ttl_info_map[ttl_entry.result_column]; - if (!block.has(ttl_entry.result_column)) - ttl_entry.expression->execute(block); - - auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column); - - for (size_t i = 0; i < block.rows(); ++i) - { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - new_ttl_info.update(cur_ttl); - } - } -} - -void TTLBlockInputStream::updateMovesTTL(Block & block) -{ - updateTTLWithDescriptions(block, metadata_snapshot->getMoveTTLs(), new_ttl_infos.moves_ttl); -} - -void TTLBlockInputStream::updateRecompressionTTL(Block & block) -{ - updateTTLWithDescriptions(block, metadata_snapshot->getRecompressionTTLs(), new_ttl_infos.recompression_ttl); -} - -UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind) -{ - if (const ColumnUInt16 * column_date = typeid_cast(column)) - return date_lut.fromDayNum(DayNum(column_date->getData()[ind])); - else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) - return column_date_time->getData()[ind]; - else if (const ColumnConst * column_const = typeid_cast(column)) - { - if (typeid_cast(&column_const->getDataColumn())) - return date_lut.fromDayNum(DayNum(column_const->getValue())); - else if (typeid_cast(&column_const->getDataColumn())) - return column_const->getValue(); - } - - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); -} - } diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h index bbe1f8782a4..da86b8d5710 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/DataStreams/TTLBlockInputStream.h @@ -3,8 +3,9 @@ #include #include #include -#include #include +#include +#include #include @@ -24,7 +25,6 @@ public: ); String getName() const override { return "TTL"; } - Block getHeader() const override { return header; } protected: @@ -34,65 +34,14 @@ protected: void readSuffixImpl() override; private: - const MergeTreeData & storage; - StorageMetadataPtr metadata_snapshot; + std::vector algorithms; + const TTLDeleteAlgorithm * delete_algorithm = nullptr; + bool all_data_dropped = false; /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; - - time_t current_time; - bool force; - - std::unique_ptr aggregator; - std::vector current_key_value; - AggregatedDataVariants agg_result; - ColumnRawPtrs agg_key_columns; - Aggregator::AggregateColumns agg_aggregate_columns; - bool agg_no_more_keys = false; - - IMergeTreeDataPart::TTLInfos old_ttl_infos; - IMergeTreeDataPart::TTLInfos new_ttl_infos; - NameSet empty_columns; - - size_t rows_removed = 0; Poco::Logger * log; - const DateLUTImpl & date_lut; - - /// TODO rewrite defaults logic to evaluteMissingDefaults - std::unordered_map defaults_result_column; - ExpressionActionsPtr defaults_expression; - Block header; -private: - /// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part - void removeValuesWithExpiredColumnTTL(Block & block); - - void executeRowsTTL(Block & block); - - /// Removes rows with expired table ttl and computes new ttl_infos for part - void removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column); - - /// Aggregates rows with expired table ttl and computes new ttl_infos for part - void aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column); - - // Calculate aggregates of aggregate_columns into agg_result - void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length); - - /// Finalize agg_result into result_columns - void finalizeAggregates(MutableColumns & result_columns); - - /// Execute description expressions on block and update ttl's in - /// ttl_info_map with expression results. - void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map); - - /// Updates TTL for moves - void updateMovesTTL(Block & block); - - /// Update values for recompression TTL using data from block. - void updateRecompressionTTL(Block & block); - - UInt32 getTimestampByIndex(const IColumn * column, size_t ind); - bool isTTLExpired(time_t ttl) const; }; } diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp new file mode 100644 index 00000000000..4747a605e3b --- /dev/null +++ b/src/DataStreams/TTLColumnAlgorithm.cpp @@ -0,0 +1,88 @@ +#include + +namespace DB +{ + +TTLColumnAlgorithm::TTLColumnAlgorithm( + const TTLDescription & description_, + const TTLInfo & old_ttl_info_, + time_t current_time_, + bool force_, + const String & column_name_, + const ExpressionActionsPtr & default_expression_) + : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) + , column_name(column_name_) + , default_expression(default_expression_) +{ + if (!isMinTTLExpired()) + { + new_ttl_info = old_ttl_info; + is_fully_empty = false; + } +} + +void TTLColumnAlgorithm::execute(Block & block) +{ + if (!block) + return; + + + /// If we read not all table columns. E.g. while mutation. + if (!block.has(column_name)) + return; + + /// Nothing to do + if (!isMinTTLExpired()) + return; + + /// Later drop full column + if (isMaxTTLExpired()) + return; + + //// TODO: use extractRequiredColumn + ColumnPtr default_column; + if (default_expression) + { + Block block_with_defaults; + block_with_defaults = block; + default_expression->execute(block_with_defaults); + default_column = block_with_defaults.getByName(column_name).column->convertToFullColumnIfConst(); + } + + auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + + auto & column_with_type = block.getByName(column_name); + const IColumn * values_column = column_with_type.column.get(); + MutableColumnPtr result_column = values_column->cloneEmpty(); + result_column->reserve(block.rows()); + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); + if (isTTLExpired(cur_ttl)) + { + if (default_column) + result_column->insertFrom(*default_column, i); + else + result_column->insertDefault(); + } + else + { + new_ttl_info.update(cur_ttl); + is_fully_empty = false; + result_column->insertFrom(*values_column, i); + } + } + + column_with_type.column = std::move(result_column); +} + +void TTLColumnAlgorithm::finalize(const MutableDataPartPtr & data_part) const +{ + data_part->ttl_infos.columns_ttl[column_name] = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); + if (is_fully_empty) + data_part->expired_columns.insert(column_name); +} + +} diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h new file mode 100644 index 00000000000..b2824dba9b0 --- /dev/null +++ b/src/DataStreams/TTLColumnAlgorithm.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace DB +{ + +class TTLColumnAlgorithm final : public ITTLAlgorithm +{ +public: + TTLColumnAlgorithm( + const TTLDescription & description_, + const TTLInfo & old_ttl_info_, + time_t current_time_, + bool force_, + const String & column_name_, + const ExpressionActionsPtr & default_expression_); + + void execute(Block & block) override; + void finalize(const MutableDataPartPtr & data_part) const override; + +private: + const String column_name; + const ExpressionActionsPtr default_expression; + + bool is_fully_empty = true; +}; + +} diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp new file mode 100644 index 00000000000..9ff4eb767df --- /dev/null +++ b/src/DataStreams/TTLDeleteAlgorithm.cpp @@ -0,0 +1,58 @@ +#include + +namespace DB +{ + +TTLDeleteAlgorithm::TTLDeleteAlgorithm( + const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) +{ + if (!isMinTTLExpired()) + new_ttl_info = old_ttl_info; +} + +void TTLDeleteAlgorithm::execute(Block & block) +{ + if (!block || !isMinTTLExpired()) + return; + + auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column); + + MutableColumns result_columns; + const auto & column_names = block.getNames(); + + result_columns.reserve(column_names.size()); + for (auto it = column_names.begin(); it != column_names.end(); ++it) + { + const IColumn * values_column = block.getByName(*it).column.get(); + MutableColumnPtr result_column = values_column->cloneEmpty(); + result_column->reserve(block.rows()); + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); + bool where_filter_passed = !where_column || where_column->getBool(i); + + if (!isTTLExpired(cur_ttl) || !where_filter_passed) + { + new_ttl_info.update(cur_ttl); + result_column->insertFrom(*values_column, i); + } + else if (it == column_names.begin()) + ++rows_removed; + } + + result_columns.emplace_back(std::move(result_column)); + } + + block = block.cloneWithColumns(std::move(result_columns)); +} + +void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const +{ + data_part->ttl_infos.table_ttl = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); +} + +} diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/DataStreams/TTLDeleteAlgorithm.h new file mode 100644 index 00000000000..36da59da46e --- /dev/null +++ b/src/DataStreams/TTLDeleteAlgorithm.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ + +class TTLDeleteAlgorithm final : public ITTLAlgorithm +{ +public: + TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + + void execute(Block & block) override; + void finalize(const MutableDataPartPtr & data_part) const override; + size_t getNumberOfRemovedRows() const { return rows_removed; } + +private: + size_t rows_removed = 0; +}; + +} diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp new file mode 100644 index 00000000000..ce4d4128eec --- /dev/null +++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp @@ -0,0 +1,47 @@ +#include + +namespace DB +{ + +TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm( + const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) +{ +} + +void TTLUpdateInfoAlgorithm::execute(Block & block) +{ + if (!block) + return; + + auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i); + new_ttl_info.update(cur_ttl); + } +} + +TTLMoveAlgorithm::TTLMoveAlgorithm( + const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_) +{ +} + +void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const +{ + data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info; +} + +TTLRecompressionAlgorithm::TTLRecompressionAlgorithm( + const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_) +{ +} + +void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const +{ + data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info; +} + +} diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/DataStreams/TTLUpdateInfoAlgorithm.h new file mode 100644 index 00000000000..4a680c5bb3a --- /dev/null +++ b/src/DataStreams/TTLUpdateInfoAlgorithm.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +namespace DB +{ + +class TTLUpdateInfoAlgorithm : public ITTLAlgorithm +{ +public: + TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + + void execute(Block & block) override; + void finalize(const MutableDataPartPtr & data_part) const override = 0; +}; + +class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm +{ +public: + TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + void finalize(const MutableDataPartPtr & data_part) const override; +}; + +class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm +{ +public: + TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + void finalize(const MutableDataPartPtr & data_part) const override; +}; + +} diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index 858bf7081e7..b0a7755c7f9 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -27,6 +27,7 @@ SRCS( ExecutionSpeedLimits.cpp ExpressionBlockInputStream.cpp IBlockInputStream.cpp + ITTLAlgorithm.cpp InputStreamFromASTInsertQuery.cpp InternalTextLogsRowOutputStream.cpp LimitBlockInputStream.cpp @@ -44,7 +45,11 @@ SRCS( SquashingBlockInputStream.cpp SquashingBlockOutputStream.cpp SquashingTransform.cpp + TTLAggregationAlgorithm.cpp TTLBlockInputStream.cpp + TTLColumnAlgorithm.cpp + TTLDeleteAlgorithm.cpp + TTLUpdateInfoAlgorithm.cpp copyData.cpp finalizeBlock.cpp materializeBlock.cpp diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 104eedf060e..cdf66ec43f6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1136,6 +1136,12 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada return false; } + for (const auto & group_by_desc : metadata_snapshot->getGroupByTTLs()) + { + if (!ttl_infos.group_by_ttl.count(group_by_desc.result_column)) + return false; + } + return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 92c8a66e828..3a0bb283b63 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -17,13 +17,17 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i updatePartMinMaxTTL(ttl_info.min, ttl_info.max); } + for (const auto & [name, ttl_info] : other_infos.group_by_ttl) + { + group_by_ttl[name].update(ttl_info); + updatePartMinMaxTTL(ttl_info.min, ttl_info.max); + } + for (const auto & [name, ttl_info] : other_infos.recompression_ttl) recompression_ttl[name].update(ttl_info); for (const auto & [expression, ttl_info] : other_infos.moves_ttl) - { moves_ttl[expression].update(ttl_info); - } table_ttl.update(other_infos.table_ttl); updatePartMinMaxTTL(table_ttl.min, table_ttl.max); @@ -59,29 +63,33 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) updatePartMinMaxTTL(table_ttl.min, table_ttl.max); } + + auto fill_ttl_info_map = [](const JSON & json_part, TTLInfoMap & ttl_info_map) + { + for (auto elem : json_part) // NOLINT + { + MergeTreeDataPartTTLInfo ttl_info; + ttl_info.min = elem["min"].getUInt(); + ttl_info.max = elem["max"].getUInt(); + String expression = elem["expression"].getString(); + ttl_info_map.emplace(expression, ttl_info); + } + }; + if (json.has("moves")) { const JSON & moves = json["moves"]; - for (auto move : moves) // NOLINT - { - MergeTreeDataPartTTLInfo ttl_info; - ttl_info.min = move["min"].getUInt(); - ttl_info.max = move["max"].getUInt(); - String expression = move["expression"].getString(); - moves_ttl.emplace(expression, ttl_info); - } + fill_ttl_info_map(moves, moves_ttl); } if (json.has("recompression")) { const JSON & recompressions = json["recompression"]; - for (auto recompression : recompressions) // NOLINT - { - MergeTreeDataPartTTLInfo ttl_info; - ttl_info.min = recompression["min"].getUInt(); - ttl_info.max = recompression["max"].getUInt(); - String expression = recompression["expression"].getString(); - recompression_ttl.emplace(expression, ttl_info); - } + fill_ttl_info_map(recompressions, recompression_ttl); + } + if (json.has("group_by")) + { + const JSON & group_by = json["group_by"]; + fill_ttl_info_map(group_by, group_by_ttl); } } @@ -118,6 +126,18 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeIntText(table_ttl.max, out); writeString("}", out); } + + auto write_info_for_expression = [&out](const auto & name, const auto & info) + { + writeString(R"({"expression":)", out); + writeString(doubleQuoteString(name), out); + writeString(R"(,"min":)", out); + writeIntText(info.min, out); + writeString(R"(,"max":)", out); + writeIntText(info.max, out); + writeString("}", out); + }; + if (!moves_ttl.empty()) { if (!columns_ttl.empty() || table_ttl.min) @@ -128,13 +148,7 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const if (it != moves_ttl.begin()) writeString(",", out); - writeString(R"({"expression":)", out); - writeString(doubleQuoteString(it->first), out); - writeString(R"(,"min":)", out); - writeIntText(it->second.min, out); - writeString(R"(,"max":)", out); - writeIntText(it->second.max, out); - writeString("}", out); + write_info_for_expression(it->first, it->second); } writeString("]", out); } @@ -149,13 +163,22 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const if (it != recompression_ttl.begin()) writeString(",", out); - writeString(R"({"expression":)", out); - writeString(doubleQuoteString(it->first), out); - writeString(R"(,"min":)", out); - writeIntText(it->second.min, out); - writeString(R"(,"max":)", out); - writeIntText(it->second.max, out); - writeString("}", out); + write_info_for_expression(it->first, it->second); + } + writeString("]", out); + } + if (!group_by_ttl.empty()) + { + if (!moves_ttl.empty() || !columns_ttl.empty() || !recompression_ttl.empty() || table_ttl.min) + writeString(",", out); + + writeString(R"("group_by":[)", out); + for (auto it = group_by_ttl.begin(); it != group_by_ttl.end(); ++it) + { + if (it != group_by_ttl.begin()) + writeString(",", out); + + write_info_for_expression(it->first, it->second); } writeString("]", out); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index 17239e2618a..8ab6d6089db 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -49,10 +49,11 @@ struct MergeTreeDataPartTTLInfos TTLInfoMap recompression_ttl; + TTLInfoMap group_by_ttl; + /// Return the smallest max recompression TTL value time_t getMinimalMaxRecompressionTTL() const; - void read(ReadBuffer & in); void write(WriteBuffer & out) const; void update(const MergeTreeDataPartTTLInfos & other_infos); @@ -68,7 +69,7 @@ struct MergeTreeDataPartTTLInfos bool empty() const { - return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty(); + return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty() && group_by_ttl.empty(); } }; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index e1284fe8d92..68c409eb85c 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -376,6 +376,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa if (metadata_snapshot->hasRowsTTL()) updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); + for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs()) + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true); + for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a4500e2aa7b..463a7c3b382 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -125,7 +125,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const bool StorageInMemoryMetadata::hasAnyTableTTL() const { - return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL(); + return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL(); } TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const @@ -168,6 +168,16 @@ bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const return !table_ttl.recompression_ttl.empty(); } +TTLDescriptions StorageInMemoryMetadata::getGroupByTTLs() const +{ + return table_ttl.group_by_ttl; +} + +bool StorageInMemoryMetadata::hasAnyGroupByTTL() const +{ + return !table_ttl.group_by_ttl.empty(); +} + ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const { if (updated_columns.empty()) diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 3656edf71f4..cf9f38fe135 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -118,6 +118,10 @@ struct StorageInMemoryMetadata TTLDescriptions getRecompressionTTLs() const; bool hasAnyRecompressionTTL() const; + // Just wrapper for table TTLs, return info about recompression ttl + TTLDescriptions getGroupByTTLs() const; + bool hasAnyGroupByTTL() const; + /// Returns columns, which will be needed to calculate dependencies (skip /// indices, TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 7ae20ed024e..d890551893c 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -68,6 +68,10 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + + {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared())} } ) { @@ -184,6 +188,7 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc())); add_ttl_info_map(part->ttl_infos.recompression_ttl); + add_ttl_info_map(part->ttl_infos.group_by_ttl); } } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index c2c5898c70c..d8731dd4ab3 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -259,6 +259,7 @@ TTLTableDescription::TTLTableDescription(const TTLTableDescription & other) , rows_ttl(other.rows_ttl) , move_ttl(other.move_ttl) , recompression_ttl(other.recompression_ttl) + , group_by_ttl(other.group_by_ttl) { } @@ -275,6 +276,7 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription & rows_ttl = other.rows_ttl; move_ttl = other.move_ttl; recompression_ttl = other.recompression_ttl; + group_by_ttl = other.group_by_ttl; return *this; } @@ -295,7 +297,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( for (const auto & ttl_element_ptr : definition_ast->children) { auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key); - if (ttl.mode == TTLMode::DELETE || ttl.mode == TTLMode::GROUP_BY) + if (ttl.mode == TTLMode::DELETE) { if (seen_delete_ttl) throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); @@ -306,6 +308,10 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( { result.recompression_ttl.emplace_back(std::move(ttl)); } + else if (ttl.mode == TTLMode::GROUP_BY) + { + result.group_by_ttl.emplace_back(std::move(ttl)); + } else { result.move_ttl.emplace_back(std::move(ttl)); diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 4b0d4370a70..1cc3a832447 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -107,6 +107,8 @@ struct TTLTableDescription TTLDescriptions recompression_ttl; + TTLDescriptions group_by_ttl; + TTLTableDescription() = default; TTLTableDescription(const TTLTableDescription & other); TTLTableDescription & operator=(const TTLTableDescription & other); From 51cfbe8c2ec5970934438df8be4d16651458fa8c Mon Sep 17 00:00:00 2001 From: alfredlu Date: Sun, 27 Dec 2020 18:35:46 +0800 Subject: [PATCH 027/697] Add log_comment setting --- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b09e960da36..fb2e13304d3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -324,6 +324,7 @@ class IColumn; M(Bool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \ M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \ M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \ + M(String, log_comment, "comment: ", "Log comment format", 0) \ M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \ M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a2ae96e8199..8b3a56a5a97 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -158,6 +158,13 @@ static void logQuery(const String & query, const Context & context, bool interna const auto & initial_query_id = client_info.initial_query_id; const auto & current_user = client_info.current_user; + const Settings & settings = context.getSettingsRef(); + const auto & log_comment = settings.log_comment; + if (!log_comment.toString().empty()) + { + query = query + log_comment; + } + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}, using {} parser) {}", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), @@ -171,6 +178,22 @@ static void logQuery(const String & query, const Context & context, bool interna "OpenTelemetry traceparent '{}'", client_info.client_trace_context.composeTraceparentHeader()); } + + QueryLogElement elem; + + elem.type = QueryLogElementType::QUERY_START; + elem.event_time = current_time_us / 1000000; + elem.event_time_microseconds = current_time_us; + elem.query_start_time = current_time_us / 1000000; + elem.query_start_time_microseconds = current_time_us; + + elem.current_database = context.getCurrentDatabase(); + elem.query = query; + elem.normalized_query_hash = normalizedQueryHash(query); + + elem.client_info = client_info; + if (auto query_log = context.getQueryLog()) + query_log->add(elem); } } From 0c7b1518008812263f8e6f4c2a17c2360c8877f1 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Mon, 28 Dec 2020 00:54:24 +0400 Subject: [PATCH 028/697] Revisit mapped role management --- src/Access/LDAPAccessStorage.cpp | 226 +++++++++++++++---------------- src/Access/LDAPAccessStorage.h | 16 ++- 2 files changed, 122 insertions(+), 120 deletions(-) diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index e86a82c1ac2..a787c704999 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -102,10 +102,11 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m role_search_params.swap(role_search_params_cfg); common_role_names.swap(common_roles_cfg); + external_role_hashes.clear(); users_per_roles.clear(); + roles_per_users.clear(); granted_role_names.clear(); granted_role_ids.clear(); - external_role_hashes.clear(); role_change_subscription = access_control_manager->subscribeForChanges( [this] (const UUID & id, const AccessEntityPtr & entity) @@ -113,46 +114,37 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m return this->processRoleChange(id, entity); } ); - - // Update granted_role_* with the initial values: resolved ids of roles from common_role_names. - for (const auto & role_name : common_role_names) - { - if (const auto role_id = access_control_manager->find(role_name)) - { - granted_role_names.insert_or_assign(*role_id, role_name); - granted_role_ids.insert_or_assign(role_name, *role_id); - } - } } void LDAPAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity) { std::scoped_lock lock(mutex); - auto role = typeid_cast>(entity); + const auto role = typeid_cast>(entity); const auto it = granted_role_names.find(id); - if (role) // Added or renamed role. + if (role) // Added or renamed a role. { const auto & new_role_name = role->getName(); - if (it != granted_role_names.end()) + if (it != granted_role_names.end()) // Renamed a granted role. { - // Revoke the old role if its name has been changed. const auto & old_role_name = it->second; if (new_role_name != old_role_name) { + // Revoke the old role first, then grant the new role. applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + applyRoleChangeNoLock(true /* grant */, id, new_role_name); } } - - // Grant the role. - applyRoleChangeNoLock(true /* grant */, id, new_role_name); - } - else // Removed role. - { - if (it != granted_role_names.end()) + else // Added a role. + { + applyRoleChangeNoLock(true /* grant */, id, new_role_name); + } + } + else // Removed a role. + { + if (it != granted_role_names.end()) // Removed a granted role. { - // Revoke the old role. const auto & old_role_name = it->second; applyRoleChangeNoLock(false /* revoke */, id, old_role_name); } @@ -164,7 +156,7 @@ void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, { std::vector user_ids; - // Find relevant user ids. + // Build a list of ids of the relevant users. if (common_role_names.count(role_name)) { user_ids = memory_storage.findAll(); @@ -176,6 +168,7 @@ void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, { const auto & user_names = it->second; user_ids.reserve(user_names.size()); + for (const auto & user_name : user_names) { if (const auto user_id = memory_storage.find(user_name)) @@ -184,7 +177,7 @@ void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, } } - // Update relevant users' granted roles. + // Update the granted roles of the relevant users. if (!user_ids.empty()) { auto update_func = [&role_id, &grant] (const AccessEntityPtr & entity_) -> AccessEntityPtr @@ -205,129 +198,135 @@ void LDAPAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, }; memory_storage.update(user_ids, update_func); + } - if (grant) + // Actualize granted_role_* mappings. + if (grant) + { + if (!user_ids.empty()) { granted_role_names.insert_or_assign(role_id, role_name); granted_role_ids.insert_or_assign(role_name, role_id); } - else - { - granted_role_names.erase(role_id); - granted_role_ids.erase(role_name); - } + } + else + { + granted_role_names.erase(role_id); + granted_role_ids.erase(role_name); } } -void LDAPAccessStorage::grantRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const +void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const +{ + const auto external_roles_hash = boost::hash{}(external_roles); + return assignRolesNoLock(user, external_roles, external_roles_hash); +} + + +void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPSearchResultsList & external_roles, const std::size_t external_roles_hash) const { const auto & user_name = user.getName(); - const auto new_hash = boost::hash{}(external_roles); auto & granted_roles = user.granted_roles.roles; + const auto local_role_names = mapExternalRolesNoLock(external_roles); - // Map external role names to local role names. - const auto user_role_names = mapExternalRolesNoLock(external_roles); - - external_role_hashes.erase(user_name); - granted_roles.clear(); - - // Grant the common roles. - - // Initially, all the available ids of common roles were resolved in setConfiguration(), - // and, then, maintained by processRoleChange(), so here we just grant those that exist (i.e., resolved). - for (const auto & role_name : common_role_names) + auto grant_role = [this, &user_name, &granted_roles] (const String & role_name, const bool common) { - const auto it = granted_role_ids.find(role_name); - if (it == granted_role_ids.end()) - { - LOG_WARNING(getLogger(), "Unable to grant common role '{}' to user '{}': role not found", role_name, user_name); - } - else - { - const auto & role_id = it->second; - granted_roles.insert(role_id); - } - } - - // Grant the mapped external roles. - - // Cleanup helper relations. - for (auto it = users_per_roles.begin(); it != users_per_roles.end();) - { - const auto & role_name = it->first; - auto & user_names = it->second; - if (user_role_names.count(role_name) == 0) - { - user_names.erase(user_name); - if (user_names.empty()) - { - if (common_role_names.count(role_name) == 0) - { - auto rit = granted_role_ids.find(role_name); - if (rit != granted_role_ids.end()) - { - granted_role_names.erase(rit->second); - granted_role_ids.erase(rit); - } - } - users_per_roles.erase(it++); - } - else - { - ++it; - } - } - else - { - ++it; - } - } - - // Resolve and assign mapped external role ids. - for (const auto & role_name : user_role_names) - { - users_per_roles[role_name].insert(user_name); - const auto it = granted_role_ids.find(role_name); + auto it = granted_role_ids.find(role_name); if (it == granted_role_ids.end()) { if (const auto role_id = access_control_manager->find(role_name)) { - granted_roles.insert(*role_id); granted_role_names.insert_or_assign(*role_id, role_name); - granted_role_ids.insert_or_assign(role_name, *role_id); - } - else - { - LOG_WARNING(getLogger(), "Unable to grant mapped role '{}' to user '{}': role not found", role_name, user_name); + it = granted_role_ids.insert_or_assign(role_name, *role_id).first; } } - else + + if (it != granted_role_ids.end()) { const auto & role_id = it->second; granted_roles.insert(role_id); } + else + { + LOG_WARNING(getLogger(), "Unable to grant {} role '{}' to user '{}': role not found", (common ? "common" : "mapped"), role_name, user_name); + } + }; + + external_role_hashes.erase(user_name); + granted_roles.clear(); + const auto old_role_names = std::move(roles_per_users[user_name]); + + // Grant the common roles first. + for (const auto & role_name : common_role_names) + { + grant_role(role_name, true /* common */); } - external_role_hashes[user_name] = new_hash; + // Grant the mapped external roles and actualize users_per_roles mapping. + // local_role_names allowed to overlap with common_role_names. + for (const auto & role_name : local_role_names) + { + grant_role(role_name, false /* mapped */); + users_per_roles[role_name].insert(user_name); + } + + // Cleanup users_per_roles and granted_role_* mappings. + for (const auto & old_role_name : old_role_names) + { + if (local_role_names.count(old_role_name)) + continue; + + const auto rit = users_per_roles.find(old_role_name); + if (rit == users_per_roles.end()) + continue; + + auto & user_names = rit->second; + user_names.erase(user_name); + + if (!user_names.empty()) + continue; + + users_per_roles.erase(rit); + + if (common_role_names.count(old_role_name)) + continue; + + const auto iit = granted_role_ids.find(old_role_name); + if (iit == granted_role_ids.end()) + continue; + + const auto old_role_id = iit->second; + granted_role_names.erase(old_role_id); + granted_role_ids.erase(iit); + } + + // Actualize roles_per_users mapping and external_role_hashes cache. + if (local_role_names.empty()) + roles_per_users.erase(user_name); + else + roles_per_users[user_name] = std::move(local_role_names); + + external_role_hashes[user_name] = external_roles_hash; } -void LDAPAccessStorage::updateRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const +void LDAPAccessStorage::updateAssignedRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const { - // common_role_names are not included since they don't change. - const auto new_hash = boost::hash{}(external_roles); + // No need to include common_role_names in this hash each time, since they don't change. + const auto external_roles_hash = boost::hash{}(external_roles); + // Map and grant the roles from scratch only if the list of external role has changed. const auto it = external_role_hashes.find(user_name); - if (it != external_role_hashes.end() && it->second == new_hash) + if (it != external_role_hashes.end() && it->second == external_roles_hash) return; - auto update_func = [this, &external_roles] (const AccessEntityPtr & entity_) -> AccessEntityPtr + auto update_func = [this, &external_roles, external_roles_hash] (const AccessEntityPtr & entity_) -> AccessEntityPtr { if (auto user = typeid_cast>(entity_)) { auto changed_user = typeid_cast>(user->clone()); - grantRolesNoLock(*changed_user, external_roles); + assignRolesNoLock(*changed_user, external_roles, external_roles_hash); return changed_user; } return entity_; @@ -529,7 +528,7 @@ UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & passw throwAddressNotAllowed(address); // Just in case external_roles are changed. This will be no-op if they are not. - updateRolesNoLock(*id, user_name, external_roles); + updateAssignedRolesNoLock(*id, user_name, external_roles); return *id; } @@ -547,7 +546,7 @@ UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & passw if (!isAddressAllowedImpl(*user, address)) throwAddressNotAllowed(address); - grantRolesNoLock(*user, external_roles); + assignRolesNoLock(*user, external_roles); return memory_storage.insert(user); } @@ -570,9 +569,10 @@ UUID LDAPAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const user->authentication.setServerName(ldap_server); LDAPSearchResultsList external_roles; - // TODO: mapped external roles are not available here. Implement? - grantRolesNoLock(*user, external_roles); + // TODO: mapped external roles are not available here. Without a password we can't authenticate and retrieve roles from LDAP server. + + assignRolesNoLock(*user, external_roles); return memory_storage.insert(user); } diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index 8ec7325f9c2..cce50fd03aa 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -65,8 +65,9 @@ private: void processRoleChange(const UUID & id, const AccessEntityPtr & entity); void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); - void grantRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const; - void updateRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const; + void assignRolesNoLock(User & user, const LDAPSearchResultsList & external_roles) const; + void assignRolesNoLock(User & user, const LDAPSearchResultsList & external_roles, const std::size_t external_roles_hash) const; + void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const; std::set mapExternalRolesNoLock(const LDAPSearchResultsList & external_roles) const; bool isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const; @@ -74,11 +75,12 @@ private: AccessControlManager * access_control_manager = nullptr; String ldap_server; LDAPSearchParamsList role_search_params; - std::set common_role_names; - mutable std::map> users_per_roles; // per-user roles: role name -> user names - mutable std::map granted_role_names; // currently granted roles: role id -> role name - mutable std::map granted_role_ids; // currently granted roles: role name -> role id - mutable std::map external_role_hashes; // user name -> LDAPSearchResultsList hash + std::set common_role_names; // role name that should be granted to all users at all times + mutable std::map external_role_hashes; // user name -> LDAPSearchResultsList hash (most recently retrieved and processed) + mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) + mutable std::map> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles) + mutable std::map granted_role_names; // (currently granted) role id -> its name + mutable std::map granted_role_ids; // (currently granted) role name -> its id ext::scope_guard role_change_subscription; mutable MemoryAccessStorage memory_storage; }; From 7ec8a73cc321640edc002b21e6263c5284cb89b5 Mon Sep 17 00:00:00 2001 From: alfredlu Date: Mon, 28 Dec 2020 10:38:16 +0800 Subject: [PATCH 029/697] fix build --- src/Core/Settings.h | 2 +- src/Interpreters/QueryLog.h | 2 ++ src/Interpreters/executeQuery.cpp | 25 ++++--------------------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fb2e13304d3..16e6a72ab82 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -324,7 +324,7 @@ class IColumn; M(Bool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \ M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \ M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \ - M(String, log_comment, "comment: ", "Log comment format", 0) \ + M(String, log_comment, "", "Log comment into system.query_thread_log table. It can be set to arbitrary string no longer than max_query_size.", 0) \ M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \ M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \ diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 8b23a1f1ef9..2a90f78941d 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -64,6 +64,8 @@ struct QueryLogElement ClientInfo client_info; + String log_comment; + std::vector thread_ids; std::shared_ptr profile_counters; std::shared_ptr query_settings; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 8b3a56a5a97..2c4e8cd7cb3 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -160,16 +160,12 @@ static void logQuery(const String & query, const Context & context, bool interna const Settings & settings = context.getSettingsRef(); const auto & log_comment = settings.log_comment; - if (!log_comment.toString().empty()) - { - query = query + log_comment; - } LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}, using {} parser) {}", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), - (context.getSettingsRef().use_antlr_parser ? "new" : "old"), + (context.getSettingsRef().use_antlr_parser ? "new" : "old"), (!log_comment.empty() ? ", comment: " + log_comment : std::string() : ""), joinLines(query)); if (client_info.client_trace_context.trace_id) @@ -178,22 +174,6 @@ static void logQuery(const String & query, const Context & context, bool interna "OpenTelemetry traceparent '{}'", client_info.client_trace_context.composeTraceparentHeader()); } - - QueryLogElement elem; - - elem.type = QueryLogElementType::QUERY_START; - elem.event_time = current_time_us / 1000000; - elem.event_time_microseconds = current_time_us; - elem.query_start_time = current_time_us / 1000000; - elem.query_start_time_microseconds = current_time_us; - - elem.current_database = context.getCurrentDatabase(); - elem.query = query; - elem.normalized_query_hash = normalizedQueryHash(query); - - elem.client_info = client_info; - if (auto query_log = context.getQueryLog()) - query_log->add(elem); } } @@ -650,6 +630,9 @@ static std::tuple executeQueryImpl( if (settings.log_query_settings) elem.query_settings = std::make_shared(context.getSettingsRef()); + if (!settings.log_comment.toString().empty() && settings.log_comment.toString().length() <= max_query_size) + elem.log_comment = settings.log_comment.toString(); + if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) { if (auto query_log = context.getQueryLog()) From 8e665955e76a2a5920b18a6b139ba21f4b3f1f08 Mon Sep 17 00:00:00 2001 From: TszkitLo40 Date: Mon, 28 Dec 2020 11:09:29 +0800 Subject: [PATCH 030/697] fix build. --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2c4e8cd7cb3..aa820eee429 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -159,13 +159,13 @@ static void logQuery(const String & query, const Context & context, bool interna const auto & current_user = client_info.current_user; const Settings & settings = context.getSettingsRef(); - const auto & log_comment = settings.log_comment; + const String & log_comment = settings.log_comment; LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}, using {} parser) {}", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), - (context.getSettingsRef().use_antlr_parser ? "new" : "old"), (!log_comment.empty() ? ", comment: " + log_comment : std::string() : ""), + (context.getSettingsRef().use_antlr_parser ? "new" : "old"), (!log_comment.empty() && log_comment.length() < context.getSettingsRef().max_query_size ? ", comment: " + log_comment : std::string()), joinLines(query)); if (client_info.client_trace_context.trace_id) From 9f2ae66eb5c3796c5838690fcf78b3ce24d4012f Mon Sep 17 00:00:00 2001 From: TszkitLo40 Date: Mon, 28 Dec 2020 20:37:09 +0800 Subject: [PATCH 031/697] fix style. --- src/Interpreters/executeQuery.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index aa820eee429..668188ee93d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -158,14 +158,15 @@ static void logQuery(const String & query, const Context & context, bool interna const auto & initial_query_id = client_info.initial_query_id; const auto & current_user = client_info.current_user; - const Settings & settings = context.getSettingsRef(); - const String & log_comment = settings.log_comment; - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}, using {} parser) {}", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), - (context.getSettingsRef().use_antlr_parser ? "new" : "old"), (!log_comment.empty() && log_comment.length() < context.getSettingsRef().max_query_size ? ", comment: " + log_comment : std::string()), + (context.getSettingsRef().use_antlr_parser ? "new" : "old"), + (!context.getSettingsRef().log_comment.empty() + && context.getSettingsRef().log_comment.length() <= context.getSettingsRef().max_query_size + ? ", comment: " + context.getSettingsRef().log_comment + : std::string()), joinLines(query)); if (client_info.client_trace_context.trace_id) From 55dac9715ae567f4c577fbb8ee5859a82b5500bb Mon Sep 17 00:00:00 2001 From: TszkitLo40 Date: Mon, 28 Dec 2020 20:57:27 +0800 Subject: [PATCH 032/697] bugfix --- src/Interpreters/executeQuery.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 668188ee93d..d7e174e0113 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -163,8 +163,8 @@ static void logQuery(const String & query, const Context & context, bool interna (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), (context.getSettingsRef().use_antlr_parser ? "new" : "old"), - (!context.getSettingsRef().log_comment.empty() - && context.getSettingsRef().log_comment.length() <= context.getSettingsRef().max_query_size + (!context.getSettingsRef().log_comment.toString().empty() + && context.getSettingsRef().log_comment.toString().length() <= context.getSettingsRef().max_query_size ? ", comment: " + context.getSettingsRef().log_comment : std::string()), joinLines(query)); @@ -253,6 +253,9 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c elem.client_info = context.getClientInfo(); + if (!settings.log_comment.toString().empty() && settings.log_comment.toString().length() <= max_query_size) + elem.log_comment = settings.log_comment.toString(); + if (settings.calculate_text_stack_trace) setExceptionStackTrace(elem); logException(context, elem); From f7d7880ad54fd22e7c80e73977fb26c7c45aa666 Mon Sep 17 00:00:00 2001 From: TszkitLo40 Date: Tue, 29 Dec 2020 10:13:27 +0800 Subject: [PATCH 033/697] fix build --- src/Core/Settings.h | 2 +- src/Interpreters/executeQuery.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 16e6a72ab82..00e5651ec97 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -324,7 +324,7 @@ class IColumn; M(Bool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \ M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \ M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \ - M(String, log_comment, "", "Log comment into system.query_thread_log table. It can be set to arbitrary string no longer than max_query_size.", 0) \ + M(String, log_comment, "", "Log comment into system.query_log table. It can be set to arbitrary string no longer than max_query_size.", 0) \ M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \ M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index d7e174e0113..8ed77de6ac6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -165,7 +165,7 @@ static void logQuery(const String & query, const Context & context, bool interna (context.getSettingsRef().use_antlr_parser ? "new" : "old"), (!context.getSettingsRef().log_comment.toString().empty() && context.getSettingsRef().log_comment.toString().length() <= context.getSettingsRef().max_query_size - ? ", comment: " + context.getSettingsRef().log_comment + ? ", comment: " + context.getSettingsRef().log_comment.toString() : std::string()), joinLines(query)); @@ -253,7 +253,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c elem.client_info = context.getClientInfo(); - if (!settings.log_comment.toString().empty() && settings.log_comment.toString().length() <= max_query_size) + if (!settings.log_comment.toString().empty() && settings.log_comment.toString().length() <= settings.max_query_size) elem.log_comment = settings.log_comment.toString(); if (settings.calculate_text_stack_trace) From a8f1786d952482e0e4224537ad27e6cf8bd92ae2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 29 Dec 2020 18:19:11 +0300 Subject: [PATCH 034/697] fix TTL with GROUP BY --- src/DataStreams/TTLColumnAlgorithm.cpp | 1 - src/Storages/TTLDescription.cpp | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp index 4747a605e3b..afab3af62a7 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/DataStreams/TTLColumnAlgorithm.cpp @@ -26,7 +26,6 @@ void TTLColumnAlgorithm::execute(Block & block) if (!block) return; - /// If we read not all table columns. E.g. while mutation. if (!block.has(column_name)) return; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index d8731dd4ab3..f0c936b10c2 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -211,9 +211,12 @@ TTLDescription TTLDescription::getTTLFromAST( const auto & primary_key_expressions = primary_key.expression_list_ast->children; for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i) { - ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone()); - aggregations.emplace_back(pk_columns[i], std::move(expr)); - aggregation_columns_set.insert(pk_columns[i]); + if (!aggregation_columns_set.count(pk_columns[i])) + { + ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone()); + aggregations.emplace_back(pk_columns[i], std::move(expr)); + aggregation_columns_set.insert(pk_columns[i]); + } } for (const auto & column : columns.getOrdinary()) From 5875b934f8f5a6e38892c908d2571f326c4b7ba7 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 16:33:47 +0300 Subject: [PATCH 035/697] Edited original article --- .../utilities/clickhouse-benchmark.md | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 9c90ba7f028..e3ebef76dae 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -7,7 +7,7 @@ toc_title: clickhouse-benchmark Connects to a ClickHouse server and repeatedly sends specified queries. -Syntax: +**Syntax** ``` bash $ clickhouse-benchmark --query ["single query"] [keys] @@ -28,11 +28,11 @@ $ clickhouse-benchmark [keys] <<< "single query" If you want to send a set of queries, create a text file and place each query on the individual string in this file. For example: ``` sql -SELECT * FROM system.numbers LIMIT 10000000 -SELECT 1 +SELECT * FROM system.numbers LIMIT 10000000; +SELECT 1; ``` -Then pass this file to a standard input of `clickhouse-benchmark`. +Then pass this file to a standard input of `clickhouse-benchmark`: ``` bash clickhouse-benchmark [keys] < queries_file @@ -40,23 +40,23 @@ clickhouse-benchmark [keys] < queries_file ## Keys {#clickhouse-benchmark-keys} -- `--query=WORD` - Query to execute. If this parameter is not passed clickhouse-benchmark will read queries from standard input. -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. +- `--query=WORD` - Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: `1`. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set `0`). Default value: `1`. - `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). -- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. -- `-s`, `--secure` — Using TLS connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) test to determine whether the two distributions aren’t different with the selected level of confidence. +- `-p N`, `--port=N` — Server port. Default value: `9000`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-i N`, `--iterations=N` — Total number of queries. Default value: `0` (repeat forever). +- `-r`, `--randomize` — Random order of queries execution if there is more than one input query. +- `-s`, `--secure` — Using `TLS` connection. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: `0` (time limit disabled). +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. +- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. - `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. - `--help` — Shows the help message. If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. @@ -96,11 +96,11 @@ In the report you can find: - Endpoint of ClickHouse server. - Number of processed queries. - - QPS: QPS: How many queries server performed per second during a period specified in the `--delay` argument. - - RPS: How many rows server read per second during a period specified in the `--delay` argument. - - MiB/s: How many mebibytes server read per second during a period specified in the `--delay` argument. - - result RPS: How many rows placed by server to the result of a query per second during a period specified in the `--delay` argument. - - result MiB/s. How many mebibytes placed by server to the result of a query per second during a period specified in the `--delay` argument. + - QPS: How many queries the server performed per second during a period specified in the `--delay` argument. + - RPS: How many rows the server reads per second during a period specified in the `--delay` argument. + - MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument. + - result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument. + - result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument. - Percentiles of queries execution time. @@ -159,3 +159,5 @@ localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, resu 99.900% 0.172 sec. 99.990% 0.172 sec. ``` + +[Original article](https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark.md) From b9694e42cf55b61a132904df1e9b495f8d4f8dec Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 16:34:10 +0300 Subject: [PATCH 036/697] Added Russian translation --- .../utilities/clickhouse-benchmark.md | 164 +++++++++++++++++- 1 file changed, 163 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 3695c9fbdd3..5929dc0c756 120000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -1 +1,163 @@ -../../../en/operations/utilities/clickhouse-benchmark.md \ No newline at end of file +--- +toc_priority: 61 +toc_title: clickhouse-benchmark +--- + +# clickhouse-benchmark {#clickhouse-benchmark} + +Устанавливает соединение с сервером ClickHouse и неоднократно посылает указанные запросы. + +**Синтаксис** + +``` bash +$ clickhouse-benchmark --query ["single query"] [keys] +``` + +или + +``` bash +$ echo "single query" | clickhouse-benchmark [keys] +``` + +или + +``` bash +$ clickhouse-benchmark [keys] <<< "single query" +``` + +Если нужно послать набор запросов, создайте текстовый файл и расположите каждый запрос на отдельной строке в файле. Например: + +``` sql +SELECT * FROM system.numbers LIMIT 10000000; +SELECT 1; +``` + +После этого передайте этот файл в стандартный ввод `clickhouse-benchmark`: + +``` bash +clickhouse-benchmark [keys] < queries_file +``` + +## Ключи {#clickhouse-benchmark-keys} + +- `--query=WORD` - запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать считывать запросы из стандартного ввода. +- `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. +- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. +- `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. +- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: `9000`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. +- `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: `0` (вечно будет повторяться). +- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии больше чем одного входного запроса. +- `-s`, `--secure` — использование `TLS` соединения. +- `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: `0` (лимит отключен). +- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: `5`. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. +- `--cumulative` — выводит совокупность данных, а не данные за интервал. +- `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`. +- `--json=FILEPATH` — формат вывода `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит сообщение в указанный JSON-файл. +- `--user=USERNAME` — имя пользователя ClickHouse. Значение по умолчанию: `default`. +- `--password=PSWD` — пароль пользователя ClickHouse. Значение по умолчанию: пустая строка. +- `--stacktrace` — вывод трассировки стека. Когда этот ключ указан, `clickhouse-bencmark` выводит трассировку стека исключений. +- `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. +- `--help` — показывает help-сообщение. + +Если нужно применить какие-нибудь [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. + +## Вывод {#clickhouse-benchmark-output} + +По умолчанию, `clickhouse-benchmark` выводит сообщение для каждого `--delay` интервала. + +Пример сообщения: + +``` text +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. + +0.000% 0.145 sec. +10.000% 0.146 sec. +20.000% 0.146 sec. +30.000% 0.146 sec. +40.000% 0.147 sec. +50.000% 0.148 sec. +60.000% 0.148 sec. +70.000% 0.148 sec. +80.000% 0.149 sec. +90.000% 0.150 sec. +95.000% 0.150 sec. +99.000% 0.150 sec. +99.900% 0.150 sec. +99.990% 0.150 sec. +``` + +В сообщении можно найти: + +- Количество запросов в поле `Queries executed:`. + +- Строка статуса, содержащая (в данном порядке): + + - Конечная точка сервера ClickHouse. + - Число обработанных запросов. + - QPS: количество запросов, выполняемых сервером за секунду за в течение `--delay` интервала. + - RPS: количество строк, читаемых сервером за секунду за в течение `--delay` интервала. + - MiB/s: количество Мебибайтов, считываемых сервером за секунду в течение `--delay` интервала. + - result RPS: количество столбцов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. + - result MiB/s. количество Мебибайтов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. + +- Процентили времени выполнения запросов. + +## Сравнительный режим {#clickhouse-benchmark-comparison-mode} + +`clickhouse-benchmark` может сравнивать производительность двух работающих серверов ClickHouse. + +Для использования сравнительного режима укажите конечную точку двух серверов двумя парами ключей `--host`, `--port`. Связь ключей соответствует позициям в списке аргументов, первый `--host` соответствует первому `--port` и так далее. `clickhouse-benchmark` устанавливает соединение с обоими серверами и отсылает запросы. Каждый запрос адресован случайно выбранному серверу. Результаты выводятся отдельно для каждого сервера. + +## Пример {#clickhouse-benchmark-example} + +``` bash +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +``` + +``` text +Loaded 1 queries. + +Queries executed: 6. + +localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.159 sec. +30.000% 0.160 sec. +40.000% 0.160 sec. +50.000% 0.162 sec. +60.000% 0.164 sec. +70.000% 0.165 sec. +80.000% 0.166 sec. +90.000% 0.166 sec. +95.000% 0.167 sec. +99.000% 0.167 sec. +99.900% 0.167 sec. +99.990% 0.167 sec. + + + +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.160 sec. +30.000% 0.163 sec. +40.000% 0.164 sec. +50.000% 0.165 sec. +60.000% 0.166 sec. +70.000% 0.166 sec. +80.000% 0.167 sec. +90.000% 0.167 sec. +95.000% 0.170 sec. +99.000% 0.172 sec. +99.900% 0.172 sec. +99.990% 0.172 sec. +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/utilities/clickhouse-benchmark.md) From c7b9cf73488da560dbd9e878ab6473f72edc8625 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 17:12:10 +0300 Subject: [PATCH 037/697] Minor fixes --- docs/en/operations/utilities/clickhouse-benchmark.md | 2 +- docs/ru/operations/utilities/clickhouse-benchmark.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index e3ebef76dae..b8a29ec871e 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -40,7 +40,7 @@ clickhouse-benchmark [keys] < queries_file ## Keys {#clickhouse-benchmark-keys} -- `--query=WORD` - Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. +- `--query=WORD` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: `1`. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set `0`). Default value: `1`. - `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 5929dc0c756..4fbc5b230ec 120000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -40,7 +40,7 @@ clickhouse-benchmark [keys] < queries_file ## Ключи {#clickhouse-benchmark-keys} -- `--query=WORD` - запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать считывать запросы из стандартного ввода. +- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать считывать запросы из стандартного ввода. - `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. - `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. - `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. From ab7e3e87cfc267e6d49f9be0e658766f61a6fd5b Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 18:09:24 +0300 Subject: [PATCH 038/697] Fixed typos --- docs/ru/operations/utilities/clickhouse-benchmark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 4fbc5b230ec..e3e567f3978 120000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -40,7 +40,7 @@ clickhouse-benchmark [keys] < queries_file ## Ключи {#clickhouse-benchmark-keys} -- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать считывать запросы из стандартного ввода. +- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. - `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. - `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. - `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. From 9da4e2509247e47ca3891b6c90af89ad79a0c810 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 18:27:35 +0300 Subject: [PATCH 039/697] Resolving symling issue --- .../utilities/clickhouse-benchmark.md | 163 ------------------ 1 file changed, 163 deletions(-) delete mode 120000 docs/ru/operations/utilities/clickhouse-benchmark.md diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md deleted file mode 120000 index e3e567f3978..00000000000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -toc_priority: 61 -toc_title: clickhouse-benchmark ---- - -# clickhouse-benchmark {#clickhouse-benchmark} - -Устанавливает соединение с сервером ClickHouse и неоднократно посылает указанные запросы. - -**Синтаксис** - -``` bash -$ clickhouse-benchmark --query ["single query"] [keys] -``` - -или - -``` bash -$ echo "single query" | clickhouse-benchmark [keys] -``` - -или - -``` bash -$ clickhouse-benchmark [keys] <<< "single query" -``` - -Если нужно послать набор запросов, создайте текстовый файл и расположите каждый запрос на отдельной строке в файле. Например: - -``` sql -SELECT * FROM system.numbers LIMIT 10000000; -SELECT 1; -``` - -После этого передайте этот файл в стандартный ввод `clickhouse-benchmark`: - -``` bash -clickhouse-benchmark [keys] < queries_file -``` - -## Ключи {#clickhouse-benchmark-keys} - -- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. -- `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. -- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. -- `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. -- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: `9000`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. -- `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: `0` (вечно будет повторяться). -- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии больше чем одного входного запроса. -- `-s`, `--secure` — использование `TLS` соединения. -- `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: `0` (лимит отключен). -- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: `5`. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. -- `--cumulative` — выводит совокупность данных, а не данные за интервал. -- `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`. -- `--json=FILEPATH` — формат вывода `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит сообщение в указанный JSON-файл. -- `--user=USERNAME` — имя пользователя ClickHouse. Значение по умолчанию: `default`. -- `--password=PSWD` — пароль пользователя ClickHouse. Значение по умолчанию: пустая строка. -- `--stacktrace` — вывод трассировки стека. Когда этот ключ указан, `clickhouse-bencmark` выводит трассировку стека исключений. -- `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. -- `--help` — показывает help-сообщение. - -Если нужно применить какие-нибудь [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. - -## Вывод {#clickhouse-benchmark-output} - -По умолчанию, `clickhouse-benchmark` выводит сообщение для каждого `--delay` интервала. - -Пример сообщения: - -``` text -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. - -0.000% 0.145 sec. -10.000% 0.146 sec. -20.000% 0.146 sec. -30.000% 0.146 sec. -40.000% 0.147 sec. -50.000% 0.148 sec. -60.000% 0.148 sec. -70.000% 0.148 sec. -80.000% 0.149 sec. -90.000% 0.150 sec. -95.000% 0.150 sec. -99.000% 0.150 sec. -99.900% 0.150 sec. -99.990% 0.150 sec. -``` - -В сообщении можно найти: - -- Количество запросов в поле `Queries executed:`. - -- Строка статуса, содержащая (в данном порядке): - - - Конечная точка сервера ClickHouse. - - Число обработанных запросов. - - QPS: количество запросов, выполняемых сервером за секунду за в течение `--delay` интервала. - - RPS: количество строк, читаемых сервером за секунду за в течение `--delay` интервала. - - MiB/s: количество Мебибайтов, считываемых сервером за секунду в течение `--delay` интервала. - - result RPS: количество столбцов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. - - result MiB/s. количество Мебибайтов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. - -- Процентили времени выполнения запросов. - -## Сравнительный режим {#clickhouse-benchmark-comparison-mode} - -`clickhouse-benchmark` может сравнивать производительность двух работающих серверов ClickHouse. - -Для использования сравнительного режима укажите конечную точку двух серверов двумя парами ключей `--host`, `--port`. Связь ключей соответствует позициям в списке аргументов, первый `--host` соответствует первому `--port` и так далее. `clickhouse-benchmark` устанавливает соединение с обоими серверами и отсылает запросы. Каждый запрос адресован случайно выбранному серверу. Результаты выводятся отдельно для каждого сервера. - -## Пример {#clickhouse-benchmark-example} - -``` bash -$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 -``` - -``` text -Loaded 1 queries. - -Queries executed: 6. - -localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.159 sec. -30.000% 0.160 sec. -40.000% 0.160 sec. -50.000% 0.162 sec. -60.000% 0.164 sec. -70.000% 0.165 sec. -80.000% 0.166 sec. -90.000% 0.166 sec. -95.000% 0.167 sec. -99.000% 0.167 sec. -99.900% 0.167 sec. -99.990% 0.167 sec. - - - -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.160 sec. -30.000% 0.163 sec. -40.000% 0.164 sec. -50.000% 0.165 sec. -60.000% 0.166 sec. -70.000% 0.166 sec. -80.000% 0.167 sec. -90.000% 0.167 sec. -95.000% 0.170 sec. -99.000% 0.172 sec. -99.900% 0.172 sec. -99.990% 0.172 sec. -``` - -[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/utilities/clickhouse-benchmark.md) From 43de7688ef0262d17ab0e09262bc6e95866ddf9f Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 18:28:34 +0300 Subject: [PATCH 040/697] Resolving symlink issue --- .../utilities/clickhouse-benchmark.md | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 docs/ru/operations/utilities/clickhouse-benchmark.md diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md new file mode 100644 index 00000000000..a72a7e9f0a1 --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -0,0 +1,162 @@ +toc_priority: 61 +toc_title: clickhouse-benchmark +--- + +# clickhouse-benchmark {#clickhouse-benchmark} + +Устанавливает соединение с сервером ClickHouse и неоднократно посылает указанные запросы. + +**Синтаксис** + +``` bash +$ clickhouse-benchmark --query ["single query"] [keys] +``` + +или + +``` bash +$ echo "single query" | clickhouse-benchmark [keys] +``` + +или + +``` bash +$ clickhouse-benchmark [keys] <<< "single query" +``` + +Если нужно послать набор запросов, создайте текстовый файл и расположите каждый запрос на отдельной строке в файле. Например: + +``` sql +SELECT * FROM system.numbers LIMIT 10000000; +SELECT 1; +``` + +После этого передайте этот файл в стандартный ввод `clickhouse-benchmark`: + +``` bash +clickhouse-benchmark [keys] < queries_file +``` + +## Ключи {#clickhouse-benchmark-keys} + +- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. +- `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. +- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. +- `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. +- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: `9000`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. +- `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: `0` (вечно будет повторяться). +- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии больше чем одного входного запроса. +- `-s`, `--secure` — использование `TLS` соединения. +- `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: `0` (лимит отключен). +- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: `5`. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. +- `--cumulative` — выводит совокупность данных, а не данные за интервал. +- `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`. +- `--json=FILEPATH` — формат вывода `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит сообщение в указанный JSON-файл. +- `--user=USERNAME` — имя пользователя ClickHouse. Значение по умолчанию: `default`. +- `--password=PSWD` — пароль пользователя ClickHouse. Значение по умолчанию: пустая строка. +- `--stacktrace` — вывод трассировки стека. Когда этот ключ указан, `clickhouse-bencmark` выводит трассировку стека исключений. +- `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. +- `--help` — показывает help-сообщение. + +Если нужно применить какие-нибудь [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. + +## Вывод {#clickhouse-benchmark-output} + +По умолчанию, `clickhouse-benchmark` выводит сообщение для каждого `--delay` интервала. + +Пример сообщения: + +``` text +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. + +0.000% 0.145 sec. +10.000% 0.146 sec. +20.000% 0.146 sec. +30.000% 0.146 sec. +40.000% 0.147 sec. +50.000% 0.148 sec. +60.000% 0.148 sec. +70.000% 0.148 sec. +80.000% 0.149 sec. +90.000% 0.150 sec. +95.000% 0.150 sec. +99.000% 0.150 sec. +99.900% 0.150 sec. +99.990% 0.150 sec. +``` + +В сообщении можно найти: + +- Количество запросов в поле `Queries executed:`. + +- Строка статуса, содержащая (в данном порядке): + + - Конечная точка сервера ClickHouse. + - Число обработанных запросов. + - QPS: количество запросов, выполняемых сервером за секунду за в течение `--delay` интервала. + - RPS: количество строк, читаемых сервером за секунду за в течение `--delay` интервала. + - MiB/s: количество Мебибайтов, считываемых сервером за секунду в течение `--delay` интервала. + - result RPS: количество столбцов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. + - result MiB/s. количество Мебибайтов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. + +- Процентили времени выполнения запросов. + +## Сравнительный режим {#clickhouse-benchmark-comparison-mode} + +`clickhouse-benchmark` может сравнивать производительность двух работающих серверов ClickHouse. + +Для использования сравнительного режима укажите конечную точку двух серверов двумя парами ключей `--host`, `--port`. Связь ключей соответствует позициям в списке аргументов, первый `--host` соответствует первому `--port` и так далее. `clickhouse-benchmark` устанавливает соединение с обоими серверами и отсылает запросы. Каждый запрос адресован случайно выбранному серверу. Результаты выводятся отдельно для каждого сервера. + +## Пример {#clickhouse-benchmark-example} + +``` bash +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +``` + +``` text +Loaded 1 queries. + +Queries executed: 6. + +localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.159 sec. +30.000% 0.160 sec. +40.000% 0.160 sec. +50.000% 0.162 sec. +60.000% 0.164 sec. +70.000% 0.165 sec. +80.000% 0.166 sec. +90.000% 0.166 sec. +95.000% 0.167 sec. +99.000% 0.167 sec. +99.900% 0.167 sec. +99.990% 0.167 sec. + + + +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.160 sec. +30.000% 0.163 sec. +40.000% 0.164 sec. +50.000% 0.165 sec. +60.000% 0.166 sec. +70.000% 0.166 sec. +80.000% 0.167 sec. +90.000% 0.167 sec. +95.000% 0.170 sec. +99.000% 0.172 sec. +99.900% 0.172 sec. +99.990% 0.172 sec. +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/utilities/clickhouse-benchmark.md) From 3ad314af4d073c18c4a0ed092e866a158c51fcdc Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 19:15:01 +0300 Subject: [PATCH 041/697] fixed a mistake --- docs/ru/operations/utilities/clickhouse-benchmark.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index a72a7e9f0a1..e3e567f3978 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -1,3 +1,4 @@ +--- toc_priority: 61 toc_title: clickhouse-benchmark --- From 0dcf69ea0490092aa880280ec67135d095125aef Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Dec 2020 20:36:06 +0300 Subject: [PATCH 042/697] Minor improvements --- .../operations/utilities/clickhouse-benchmark.md | 2 +- .../operations/utilities/clickhouse-benchmark.md | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index b8a29ec871e..f7ea0aa1302 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -35,7 +35,7 @@ SELECT 1; Then pass this file to a standard input of `clickhouse-benchmark`: ``` bash -clickhouse-benchmark [keys] < queries_file +clickhouse-benchmark [keys] < queries_file; ``` ## Keys {#clickhouse-benchmark-keys} diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index e3e567f3978..218e41c6a72 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -35,18 +35,18 @@ SELECT 1; После этого передайте этот файл в стандартный ввод `clickhouse-benchmark`: ``` bash -clickhouse-benchmark [keys] < queries_file +clickhouse-benchmark [keys] < queries_file; ``` ## Ключи {#clickhouse-benchmark-keys} - `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. - `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. -- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями. (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. +- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. - `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. - `-p N`, `--port=N` — порт сервера. Значение по умолчанию: `9000`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. - `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: `0` (вечно будет повторяться). -- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии больше чем одного входного запроса. +- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии более одного входного запроса. - `-s`, `--secure` — использование `TLS` соединения. - `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: `0` (лимит отключен). - `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: `5`. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. @@ -92,12 +92,12 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul - Количество запросов в поле `Queries executed:`. -- Строка статуса, содержащая (в данном порядке): +- Строка статуса, содержащая (в таком же порядке): - Конечная точка сервера ClickHouse. - Число обработанных запросов. - - QPS: количество запросов, выполняемых сервером за секунду за в течение `--delay` интервала. - - RPS: количество строк, читаемых сервером за секунду за в течение `--delay` интервала. + - QPS: количество запросов, выполняемых сервером за секунду в течение `--delay` интервала. + - RPS: количество строк, читаемых сервером за секунду в течение `--delay` интервала. - MiB/s: количество Мебибайтов, считываемых сервером за секунду в течение `--delay` интервала. - result RPS: количество столбцов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. - result MiB/s. количество Мебибайтов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. @@ -108,7 +108,7 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul `clickhouse-benchmark` может сравнивать производительность двух работающих серверов ClickHouse. -Для использования сравнительного режима укажите конечную точку двух серверов двумя парами ключей `--host`, `--port`. Связь ключей соответствует позициям в списке аргументов, первый `--host` соответствует первому `--port` и так далее. `clickhouse-benchmark` устанавливает соединение с обоими серверами и отсылает запросы. Каждый запрос адресован случайно выбранному серверу. Результаты выводятся отдельно для каждого сервера. +Для использования сравнительного режима укажите конечную точку двух серверов двумя парами ключей `--host`, `--port`. Связь ключей соответствует позициям в списке аргументов: первый `--host` соответствует первому `--port` и так далее. `clickhouse-benchmark` устанавливает соединение с обоими серверами и отсылает запросы. Каждый запрос адресован случайно выбранному серверу. Результаты выводятся отдельно для каждого сервера. ## Пример {#clickhouse-benchmark-example} From 8893fbcf8e9b1bbe13035209e835b55eaf52c7da Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 6 Jan 2021 07:40:47 +0400 Subject: [PATCH 043/697] Rename {username} to {user_name} Add caching/checking of search_params Adjust comments/doc Use special authentication logic from ExternalAuthenticators::checkLDAPCredentials --- programs/server/config.xml | 19 ++++--- src/Access/Authentication.cpp | 13 ----- src/Access/ExternalAuthenticators.cpp | 55 ++++++++++++++++--- src/Access/ExternalAuthenticators.h | 4 +- src/Access/LDAPAccessStorage.cpp | 10 ++-- src/Access/LDAPAccessStorage.h | 3 +- src/Access/LDAPClient.cpp | 12 ++-- src/Access/LDAPParams.h | 20 ++++--- .../external_user_directory/tests/common.py | 2 +- 9 files changed, 89 insertions(+), 49 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 98b1801372b..09a53a6589e 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -322,7 +322,7 @@ host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise. bind_dn - template used to construct the DN to bind to. - The resulting DN will be constructed by replacing all '{username}' substrings of the template with the actual + The resulting DN will be constructed by replacing all '{user_name}' substrings of the template with the actual user name during each authentication attempt. verification_cooldown - a period of time, in seconds, after a successful bind attempt, during which a user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. @@ -344,7 +344,7 @@ localhost 636 - uid={username},ou=users,dc=example,dc=com + uid={user_name},ou=users,dc=example,dc=com 300 yes tls1.2 @@ -377,20 +377,23 @@ If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication. role_mapping - section with LDAP search parameters and mapping rules. - The list of strings (values of attributes) returned by the search will be transformed and the resulting strings - will be treated as local role names and assigned to the user. + When a user authenticates, while still bound to LDAP, an LDAP search is performed using search_filter and the + name of the logged in user. For each entry found during that search, the value of the specified attribute is + extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the + value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by + CREATE ROLE command. There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be applied. base_dn - template used to construct the base DN for the LDAP search. - The resulting DN will be constructed by replacing all '{username}' and '{bind_dn}' substrings + The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings of the template with the actual user name and bind DN during each LDAP search. - attribute - attribute name whose values will be returned by the LDAP search. scope - scope of the LDAP search. Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). search_filter - template used to construct the search filter for the LDAP search. - The resulting filter will be constructed by replacing all '{username}', '{bind_dn}', and '{base_dn}' + The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}' substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. Note, that the special characters must be escaped properly in XML. + attribute - attribute name whose values will be returned by the LDAP search. prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default. @@ -403,9 +406,9 @@ ou=groups,dc=example,dc=com - cn subtree (&(objectClass=groupOfNames)(member={bind_dn})) + cn clickhouse_ diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 3b67563f6eb..19c40c068b4 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -88,17 +88,4 @@ bool Authentication::isCorrectPassword(const String & user_, const String & pass throw Exception("Cannot check if the password is correct for authentication type " + toString(type), ErrorCodes::NOT_IMPLEMENTED); } -bool Authentication::isCorrectPasswordLDAP(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators, const LDAPSearchParamsList * search_params, LDAPSearchResultsList * search_results) const -{ - if (type != LDAP_SERVER) - throw Exception("Cannot check if the password is correct using LDAP logic for authentication type " + toString(type), ErrorCodes::BAD_ARGUMENTS); - - auto ldap_server_params = external_authenticators.getLDAPServerParams(server_name); - ldap_server_params.user = user_; - ldap_server_params.password = password_; - - LDAPSimpleAuthClient ldap_client(ldap_server_params); - return ldap_client.authenticate(search_params, search_results); -} - } diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index e13341ff3f9..6f66f4303e1 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -63,7 +63,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str { const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); - params.bind_dn = auth_dn_prefix + "{username}" + auth_dn_suffix; + params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix; } if (has_verification_cooldown) @@ -177,7 +177,8 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } } -bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const String & user_name, const String & password) const +bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const String & user_name, const String & password, + const LDAPSearchParamsList * search_params, LDAPSearchResultsList * search_results) const { std::optional params; std::size_t params_hash = 0; @@ -193,7 +194,15 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const S params = pit->second; params->user = user_name; params->password = password; - params_hash = params->getCoreHash(); + + params->combineCoreHash(params_hash); + if (search_params) + { + for (const auto & params_instance : *search_params) + { + params_instance.combineHash(params_hash); + } + } // Check the cache, but only if the caching is enabled at all. if (params->verification_cooldown > std::chrono::seconds{0}) @@ -217,9 +226,19 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const S // Check if we can safely "reuse" the result of the previous successful password verification. entry.last_successful_params_hash == params_hash && last_check_period >= std::chrono::seconds{0} && - last_check_period <= params->verification_cooldown + last_check_period <= params->verification_cooldown && + + // Ensure that search_params are compatible. + ( + search_params == nullptr ? + entry.last_successful_search_results.empty() : + search_params->size() == entry.last_successful_search_results.size() + ) ) { + if (search_results) + *search_results = entry.last_successful_search_results; + return true; } @@ -236,7 +255,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const S } LDAPSimpleAuthClient client(params.value()); - const auto result = client.check(); + const auto result = client.authenticate(search_params, search_results); const auto current_check_timestamp = std::chrono::steady_clock::now(); // Update the cache, but only if this is the latest check and the server is still configured in a compatible way. @@ -253,8 +272,18 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const S new_params.user = user_name; new_params.password = password; + std::size_t new_params_hash = 0; + new_params.combineCoreHash(new_params_hash); + if (search_params) + { + for (const auto & params_instance : *search_params) + { + params_instance.combineHash(new_params_hash); + } + } + // If the critical server params have changed while we were checking the password, we discard the current result. - if (params_hash != new_params.getCoreHash()) + if (params_hash != new_params_hash) return false; auto & entry = ldap_server_caches[server][user_name]; @@ -262,8 +291,20 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const S { entry.last_successful_params_hash = params_hash; entry.last_successful_authentication_timestamp = current_check_timestamp; + + if (search_results) + entry.last_successful_search_results = *search_results; + else + entry.last_successful_search_results.clear(); } - else if (entry.last_successful_params_hash != params_hash) + else if ( + entry.last_successful_params_hash != params_hash || + ( + search_params == nullptr ? + !entry.last_successful_search_results.empty() : + search_params->size() != entry.last_successful_search_results.size() + ) + ) { // Somehow a newer check with different params/password succeeded, so the current result is obsolete and we discard it. return false; diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index fa618c92b3f..abcc8e8d10d 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -28,13 +28,15 @@ class ExternalAuthenticators public: void reset(); void setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log); - bool checkLDAPCredentials(const String & server, const String & user_name, const String & password) const; + bool checkLDAPCredentials(const String & server, const String & user_name, const String & password, + const LDAPSearchParamsList * search_params = nullptr, LDAPSearchResultsList * search_results = nullptr) const; private: struct LDAPCacheEntry { std::size_t last_successful_params_hash = 0; std::chrono::steady_clock::time_point last_successful_authentication_timestamp; + LDAPSearchResultsList last_successful_search_results; }; using LDAPServerCache = std::unordered_map; // user name -> cache entry diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index a787c704999..2602422a59a 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -364,9 +365,10 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const LDAPSearchResul } -bool LDAPAccessStorage::isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const +bool LDAPAccessStorage::isPasswordCorrectLDAPNoLock(const String & user_name, const String & password, + const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const { - return user.authentication.isCorrectPasswordLDAP(password, user.getName(), external_authenticators, &role_search_params, &search_results); + return external_authenticators.checkLDAPCredentials(ldap_server, user_name, password, &role_search_params, &search_results); } @@ -521,7 +523,7 @@ UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & passw { auto user = memory_storage.read(*id); - if (!isPasswordCorrectLDAPNoLock(*user, password, external_authenticators, external_roles)) + if (!isPasswordCorrectLDAPNoLock(user->getName(), password, external_authenticators, external_roles)) throwInvalidPassword(); if (!isAddressAllowedImpl(*user, address)) @@ -540,7 +542,7 @@ UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & passw user->authentication = Authentication(Authentication::Type::LDAP_SERVER); user->authentication.setServerName(ldap_server); - if (!isPasswordCorrectLDAPNoLock(*user, password, external_authenticators, external_roles)) + if (!isPasswordCorrectLDAPNoLock(user->getName(), password, external_authenticators, external_roles)) throwInvalidPassword(); if (!isAddressAllowedImpl(*user, address)) diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index cce50fd03aa..b3d82d1e86b 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -69,7 +69,8 @@ private: void assignRolesNoLock(User & user, const LDAPSearchResultsList & external_roles, const std::size_t external_roles_hash) const; void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const LDAPSearchResultsList & external_roles) const; std::set mapExternalRolesNoLock(const LDAPSearchResultsList & external_roles) const; - bool isPasswordCorrectLDAPNoLock(const User & user, const String & password, const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const; + bool isPasswordCorrectLDAPNoLock(const String & user_name, const String & password, + const ExternalAuthenticators & external_authenticators, LDAPSearchResultsList & search_results) const; mutable std::recursive_mutex mutex; AccessControlManager * access_control_manager = nullptr; diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index cba74fbbb89..41756aebb9a 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -271,8 +271,8 @@ void LDAPClient::openConnection() { case LDAPServerParams::SASLMechanism::SIMPLE: { - const auto escaped_username = escapeForLDAP(params.user); - const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{username}", escaped_username} }); + const auto escaped_user_name = escapeForLDAP(params.user); + const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", escaped_user_name} }); ::berval cred; cred.bv_val = const_cast(params.password.c_str()); @@ -314,10 +314,10 @@ LDAPSearchResults LDAPClient::search(const LDAPSearchParams & search_params) case LDAPSearchParams::Scope::CHILDREN: scope = LDAP_SCOPE_CHILDREN; break; } - const auto escaped_username = escapeForLDAP(params.user); - const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{username}", escaped_username} }); - const auto base_dn = replacePlaceholders(search_params.base_dn, { {"{username}", escaped_username}, {"{bind_dn}", bind_dn} }); - const auto search_filter = replacePlaceholders(search_params.search_filter, { {"{username}", escaped_username}, {"{bind_dn}", bind_dn}, {"{base_dn}", base_dn} }); + const auto escaped_user_name = escapeForLDAP(params.user); + const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", escaped_user_name} }); + const auto base_dn = replacePlaceholders(search_params.base_dn, { {"{user_name}", escaped_user_name}, {"{bind_dn}", bind_dn} }); + const auto search_filter = replacePlaceholders(search_params.search_filter, { {"{user_name}", escaped_user_name}, {"{bind_dn}", bind_dn}, {"{base_dn}", base_dn} }); char * attrs[] = { const_cast(search_params.attribute.c_str()), nullptr }; ::timeval timeout = { params.search_timeout.count(), 0 }; LDAPMessage* msgs = nullptr; diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index 426e81719bc..5181b2d1621 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -23,10 +23,19 @@ struct LDAPSearchParams }; String base_dn; + Scope scope = Scope::SUBTREE; String search_filter; String attribute = "cn"; - Scope scope = Scope::SUBTREE; String prefix; + + void combineHash(std::size_t & seed) const + { + boost::hash_combine(seed, base_dn); + boost::hash_combine(seed, static_cast(scope)); + boost::hash_combine(seed, search_filter); + boost::hash_combine(seed, attribute); + boost::hash_combine(seed, prefix); + } }; using LDAPSearchParamsList = std::vector; @@ -98,18 +107,13 @@ struct LDAPServerParams std::chrono::seconds search_timeout{20}; std::uint32_t search_limit = 100; - std::size_t getCoreHash() const + void combineCoreHash(std::size_t & seed) const { - std::size_t seed = 0; - boost::hash_combine(seed, host); boost::hash_combine(seed, port); - boost::hash_combine(seed, auth_dn_prefix); - boost::hash_combine(seed, auth_dn_suffix); + boost::hash_combine(seed, bind_dn); boost::hash_combine(seed, user); boost::hash_combine(seed, password); - - return seed; } }; diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index 6d8a97e8611..e1ee4f99545 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -77,7 +77,7 @@ def verify_ldap_user_exists(server, username, password): with By("searching LDAP database"): ldap_node = current().context.cluster.node(server) r = ldap_node.command( - f"ldapwhoami -H ldap://localhost -D 'cn={username},ou=users,dc=company,dc=com' -w {password}") + f"ldapwhoami -H ldap://localhost -D 'cn={user_name},ou=users,dc=company,dc=com' -w {password}") assert r.exitcode == 0, error() def create_ldap_external_user_directory_config_content(server=None, roles=None, **kwargs): From 31105670a8a62050f87780f595ac9c2fb2e8492d Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 6 Jan 2021 18:53:14 +0800 Subject: [PATCH 044/697] Introduce mapContains, mapKeys, mapValues functions for Map data type --- src/Functions/map.cpp | 161 ++++++++++++++++++ .../0_stateless/01651_map_functions.reference | 16 ++ .../0_stateless/01651_map_functions.sql | 21 +++ 3 files changed, 198 insertions(+) create mode 100644 tests/queries/0_stateless/01651_map_functions.reference create mode 100644 tests/queries/0_stateless/01651_map_functions.sql diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 5993ab3706e..dd74d3efa47 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -1,20 +1,28 @@ #include #include +#include #include #include #include #include #include +#include +#include #include #include #include +#include +#include +#include "array/arrayIndex.h" + namespace DB { namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -130,11 +138,164 @@ public: } }; +struct NameMapContains { static constexpr auto name = "mapContains"; }; + +class FunctionMapContains : public IFunction +{ +public: + static constexpr auto name = NameMapContains::name; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return NameMapContains::name ; + } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); + + if (!map_type) + throw Exception{"First argument for function " + getName() + " must be a map.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto key_type = map_type->getKeyType(); + + if (!(isNumber(arguments[1].type) && isNumber(key_type)) + && key_type->getName() != arguments[1].type->getName()) + throw Exception{"Second argument for function " + getName() + " must be a " + key_type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnMap * col_map = typeid_cast(arguments[0].column.get()); + if (!col_map) + return nullptr; + + const auto & nested_column = col_map->getNestedColumn(); + const auto & keys_data = col_map->getNestedData().getColumn(0); + + /// Prepare arguments to call arrayIndex for check has the array element. + ColumnsWithTypeAndName new_arguments = + { + { + ColumnArray::create(keys_data.getPtr(), nested_column.getOffsetsPtr()), + std::make_shared(result_type), + "" + }, + arguments[1] + }; + + return FunctionArrayIndex().executeImpl(new_arguments, result_type, input_rows_count); + } +}; + +class FunctionMapKeys : public IFunction +{ +public: + static constexpr auto name = "mapKeys"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name ; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); + + if (!map_type) + throw Exception{"First argument for function " + getName() + " must be a map.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto key_type = map_type->getKeyType(); + + return std::make_shared(key_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + { + const ColumnMap * col_map = typeid_cast(arguments[0].column.get()); + if (!col_map) + return nullptr; + + const auto & nested_column = col_map->getNestedColumn(); + const auto & keys_data = col_map->getNestedData().getColumn(0); + + return ColumnArray::create(keys_data.getPtr(), nested_column.getOffsetsPtr()); + } +}; + +class FunctionMapValues : public IFunction +{ +public: + static constexpr auto name = "mapValues"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name ; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); + + if (!map_type) + throw Exception{"First argument for function " + getName() + " must be a map.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto value_type = map_type->getValueType(); + + return std::make_shared(value_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + { + const ColumnMap * col_map = typeid_cast(arguments[0].column.get()); + if (!col_map) + return nullptr; + + const auto & nested_column = col_map->getNestedColumn(); + const auto & values_data = col_map->getNestedData().getColumn(1); + + return ColumnArray::create(values_data.getPtr(), nested_column.getOffsetsPtr()); + } +}; + } void registerFunctionsMap(FunctionFactory & factory) { factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/tests/queries/0_stateless/01651_map_functions.reference b/tests/queries/0_stateless/01651_map_functions.reference new file mode 100644 index 00000000000..efcd9ce8bcd --- /dev/null +++ b/tests/queries/0_stateless/01651_map_functions.reference @@ -0,0 +1,16 @@ +1 +1 +0 +1 +0 +0 +1 +0 +['name','age'] +['name','gender'] +1 0 0 +1 0 1 +1 0 0 +[232] +[233] +[234] diff --git a/tests/queries/0_stateless/01651_map_functions.sql b/tests/queries/0_stateless/01651_map_functions.sql new file mode 100644 index 00000000000..30ca3a4aeea --- /dev/null +++ b/tests/queries/0_stateless/01651_map_functions.sql @@ -0,0 +1,21 @@ +set allow_experimental_map_type = 1; + +-- String type +drop table if exists table_map; +create table table_map (a Map(String, String), b String) engine = Memory; +insert into table_map values ({'name':'zhangsan', 'age':'10'}, 'name'), ({'name':'lisi', 'gender':'female'},'age'); +select mapContains(a, 'name') from table_map; +select mapContains(a, 'gender') from table_map; +select mapContains(a, 'abc') from table_map; +select mapContains(a, b) from table_map; +select mapContains(a, 10) from table_map; -- { serverError 43 } +select mapKeys(a) from table_map; +drop table if exists table_map; + +CREATE TABLE table_map (a Map(UInt8, Int), b UInt8, c UInt32) engine = MergeTree order by tuple(); +insert into table_map select map(number, number), number, number from numbers(1000, 3); +select mapContains(a, b), mapContains(a, c), mapContains(a, 233) from table_map; +select mapContains(a, 'aaa') from table_map; -- { serverError 43 } +select mapContains(b, 'aaa') from table_map; -- { serverError 43 } +select mapKeys(a) from table_map; +drop table if exists table_map; From 41fe290b2bd9131e3895eb4ad9b0c1ddc6facbdf Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 9 Jan 2021 07:15:28 +0300 Subject: [PATCH 045/697] Update map.cpp --- src/Functions/map.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index dd74d3efa47..f1c5a26ce7d 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -138,17 +138,16 @@ public: } }; -struct NameMapContains { static constexpr auto name = "mapContains"; }; class FunctionMapContains : public IFunction { public: - static constexpr auto name = NameMapContains::name; + static constexpr auto name = "mapContains"; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { - return NameMapContains::name ; + return name; } size_t getNumberOfArguments() const override { return 2; } @@ -200,6 +199,7 @@ public: } }; + class FunctionMapKeys : public IFunction { public: @@ -208,7 +208,7 @@ public: String getName() const override { - return name ; + return name; } size_t getNumberOfArguments() const override { return 1; } @@ -244,6 +244,7 @@ public: } }; + class FunctionMapValues : public IFunction { public: @@ -252,7 +253,7 @@ public: String getName() const override { - return name ; + return name; } size_t getNumberOfArguments() const override { return 1; } From 9e3b4a67deb9bfcbb0d3d280ad96aa140b0380ed Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Mon, 11 Jan 2021 10:56:13 +0800 Subject: [PATCH 046/697] Add mapValues test --- tests/queries/0_stateless/01651_map_functions.reference | 3 +++ tests/queries/0_stateless/01651_map_functions.sql | 1 + 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01651_map_functions.reference b/tests/queries/0_stateless/01651_map_functions.reference index efcd9ce8bcd..ede7a6f5e68 100644 --- a/tests/queries/0_stateless/01651_map_functions.reference +++ b/tests/queries/0_stateless/01651_map_functions.reference @@ -14,3 +14,6 @@ [232] [233] [234] +[1000] +[1001] +[1002] diff --git a/tests/queries/0_stateless/01651_map_functions.sql b/tests/queries/0_stateless/01651_map_functions.sql index 30ca3a4aeea..c3b5bd7edd1 100644 --- a/tests/queries/0_stateless/01651_map_functions.sql +++ b/tests/queries/0_stateless/01651_map_functions.sql @@ -18,4 +18,5 @@ select mapContains(a, b), mapContains(a, c), mapContains(a, 233) from table_map; select mapContains(a, 'aaa') from table_map; -- { serverError 43 } select mapContains(b, 'aaa') from table_map; -- { serverError 43 } select mapKeys(a) from table_map; +select mapValues(a) from table_map; drop table if exists table_map; From bd05d9db2f01382b76b202177db27a5932810932 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 6 Jan 2021 00:57:05 +0300 Subject: [PATCH 047/697] Fix memory tracking for OPTIMIZE TABLE queries Because of BlockerInThread in MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex memory was accounted incorrectly and grows constantly. And IIUC there is no need in that blocker, since INSERT SELECT shares the same thread group. --- .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 8 -------- .../01641_memory_tracking_insert_optimize.reference | 0 .../01641_memory_tracking_insert_optimize.sql | 13 +++++++++++++ tests/queries/skip_list.json | 6 ++++-- 4 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference create mode 100644 tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index fd3338c8a70..9390180d51c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -180,14 +180,6 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); } - /** While filling index (index_columns), disable memory tracker. - * Because memory is allocated here (maybe in context of INSERT query), - * but then freed in completely different place (while merging parts), where query memory_tracker is not available. - * And otherwise it will look like excessively growing memory consumption in context of query. - * (observed in long INSERT SELECTs) - */ - MemoryTracker::BlockerInThread temporarily_disable_memory_tracker; - /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql new file mode 100644 index 00000000000..59b5098bbd1 --- /dev/null +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -0,0 +1,13 @@ +drop table if exists data_01641; + +create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 10)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; + +-- peak memory usage is 170MiB +set max_memory_usage='200Mi'; +system stop merges data_01641; +insert into data_01641 select number, toString(number) from numbers(toUInt64(120e6)); + +-- FIXME: this limit does not work +set max_memory_usage='10Mi'; +system start merges data_01641; +optimize table data_01641 final; diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index cfbac463932..d380ab2ecf0 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -16,7 +16,8 @@ "01474_executable_dictionary", /// informational stderr from sanitizer at start "functions_bad_arguments", /// Too long for TSan "01603_read_with_backoff_bug", /// Too long for TSan - "01646_system_restart_replicas_smoke" /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan + "01646_system_restart_replicas_smoke", /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan + "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy for TSan ], "address-sanitizer": [ "00877", @@ -62,7 +63,8 @@ "hyperscan", "01193_metadata_loading", "01473_event_time_microseconds", - "01396_inactive_replica_cleanup_nodes" + "01396_inactive_replica_cleanup_nodes", + "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy in debug build ], "unbundled-build": [ "00429", From 82edbfb5816fc60df3dcc1d064834e7915531f67 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 6 Jan 2021 02:20:26 +0300 Subject: [PATCH 048/697] Account query memory limits and sampling for OPTIMIZE TABLE/merges --- src/Storages/MergeTree/MergeList.cpp | 12 ++++++++++++ .../01641_memory_tracking_insert_optimize.sql | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index ba6c2a3d462..dbdfe650713 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -40,6 +40,18 @@ MergeListElement::MergeListElement(const std::string & database_, const std::str background_thread_memory_tracker = CurrentThread::getMemoryTracker(); if (background_thread_memory_tracker) { + /// From the query context it will be ("for thread") memory tracker with VariableContext::Thread level, + /// which does not have any limits and sampling settings configured. + /// And parent for this memory tracker should be ("(for query)") with VariableContext::Process level, + /// that has limits and sampling configured. + MemoryTracker * parent; + if (background_thread_memory_tracker->level == VariableContext::Thread && + (parent = background_thread_memory_tracker->getParent()) && + parent != &total_memory_tracker) + { + background_thread_memory_tracker = parent; + } + background_thread_memory_tracker_prev_parent = background_thread_memory_tracker->getParent(); background_thread_memory_tracker->setParent(&memory_tracker); } diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 59b5098bbd1..f059da20755 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -7,7 +7,15 @@ set max_memory_usage='200Mi'; system stop merges data_01641; insert into data_01641 select number, toString(number) from numbers(toUInt64(120e6)); --- FIXME: this limit does not work -set max_memory_usage='10Mi'; +-- peak: +-- - is 21MiB if background merges already scheduled +-- - is ~60MiB otherwise +set max_memory_usage='80Mi'; system start merges data_01641; optimize table data_01641 final; + +-- definitely should fail +set max_memory_usage='1Mi'; +optimize table data_01641 final; -- { serverError 241 } + +drop table data_01641; From 05608687d6e3f188bb26f6e576b414f8f64fee99 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 12 Jan 2021 00:50:37 +0300 Subject: [PATCH 049/697] Get back memory tracking blocker in calculateAndSerializePrimaryIndex() But reduce scope, to avoid leaking too much memory, since there are old values in last_block_index_columns. The scope of the MemoryTracker::BlockerInThread has been increased in #8290 --- .../MergeTreeDataPartWriterOnDisk.cpp | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 9390180d51c..1339127e660 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -180,19 +180,30 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); } - /// Write index. The index contains Primary Key value for each `index_granularity` row. - for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) + /** While filling index (index_columns), disable memory tracker. + * Because memory is allocated here (maybe in context of INSERT query), + * but then freed in completely different place (while merging parts), where query memory_tracker is not available. + * And otherwise it will look like excessively growing memory consumption in context of query. + * (observed in long INSERT SELECTs) + */ + MemoryTracker::BlockerInThread temporarily_disable_memory_tracker; + + /// Write index. The index contains Primary Key value for each `index_granularity` row. + for (const auto & granule : granules_to_write) { - for (size_t j = 0; j < primary_columns_num; ++j) + if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->serializeBinary(*primary_column.column, granule.start_row, *index_stream); + for (size_t j = 0; j < primary_columns_num; ++j) + { + const auto & primary_column = primary_index_block.getByPosition(j); + index_columns[j]->insertFrom(*primary_column.column, granule.start_row); + primary_column.type->serializeBinary(*primary_column.column, granule.start_row, *index_stream); + } } } } + /// store last index row to write final mark at the end of column for (size_t j = 0; j < primary_columns_num; ++j) last_block_index_columns[j] = primary_index_block.getByPosition(j).column; From 5822ee1f01e124a19ab9ab03e0ba85fd79914982 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Jan 2021 02:07:21 +0300 Subject: [PATCH 050/697] allow multiple rows TTL with WHERE expression --- src/DataStreams/TTLBlockInputStream.cpp | 4 + src/DataStreams/TTLDeleteAlgorithm.cpp | 6 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++ .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 87 +++++++++---------- .../MergeTree/MergeTreeDataPartTTLInfo.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 3 + src/Storages/StorageInMemoryMetadata.cpp | 10 +++ src/Storages/StorageInMemoryMetadata.h | 3 + src/Storages/TTLDescription.cpp | 20 +++-- src/Storages/TTLDescription.h | 5 +- .../0_stateless/01622_multiple_ttls.reference | 9 ++ .../0_stateless/01622_multiple_ttls.sql | 20 +++++ 12 files changed, 120 insertions(+), 55 deletions(-) create mode 100644 tests/queries/0_stateless/01622_multiple_ttls.reference create mode 100644 tests/queries/0_stateless/01622_multiple_ttls.sql diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 8c680f6875b..5154949ae71 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -44,6 +44,10 @@ TTLBlockInputStream::TTLBlockInputStream( algorithms.emplace_back(std::move(algorithm)); } + for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTL()) + algorithms.emplace_back(std::make_unique( + where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_)); + for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs()) algorithms.emplace_back(std::make_unique( group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_)); diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp index 9ff4eb767df..7227b40dad2 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.cpp +++ b/src/DataStreams/TTLDeleteAlgorithm.cpp @@ -51,7 +51,11 @@ void TTLDeleteAlgorithm::execute(Block & block) void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const { - data_part->ttl_infos.table_ttl = new_ttl_info; + if (description.where_expression) + data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info; + else + data_part->ttl_infos.table_ttl = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index cdf66ec43f6..e78ff09cfc4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1142,6 +1142,12 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada return false; } + for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTL()) + { + if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column)) + return false; + } + return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 3a0bb283b63..138e38e3b78 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -17,6 +17,12 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i updatePartMinMaxTTL(ttl_info.min, ttl_info.max); } + for (const auto & [name, ttl_info] : other_infos.rows_where_ttl) + { + rows_where_ttl[name].update(ttl_info); + updatePartMinMaxTTL(ttl_info.min, ttl_info.max); + } + for (const auto & [name, ttl_info] : other_infos.group_by_ttl) { group_by_ttl[name].update(ttl_info); @@ -91,6 +97,11 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) const JSON & group_by = json["group_by"]; fill_ttl_info_map(group_by, group_by_ttl); } + if (json.has("rows_where")) + { + const JSON & rows_where = json["rows_where"]; + fill_ttl_info_map(rows_where, rows_where_ttl); + } } @@ -127,61 +138,41 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeString("}", out); } - auto write_info_for_expression = [&out](const auto & name, const auto & info) + auto write_infos = [&out](const auto & infos, const auto & type, bool is_first) { - writeString(R"({"expression":)", out); - writeString(doubleQuoteString(name), out); - writeString(R"(,"min":)", out); - writeIntText(info.min, out); - writeString(R"(,"max":)", out); - writeIntText(info.max, out); - writeString("}", out); + if (!is_first) + writeString(",", out); + + writeString(type, out); + writeString(R"(:[)", out); + for (auto it = infos.begin(); it != infos.end(); ++it) + { + if (it != infos.begin()) + writeString(",", out); + + writeString(R"({"expression":)", out); + writeString(doubleQuoteString(it->first), out); + writeString(R"(,"min":)", out); + writeIntText(it->second.min, out); + writeString(R"(,"max":)", out); + writeIntText(it->second.max, out); + writeString("}", out); + } + writeString("]", out); }; - if (!moves_ttl.empty()) - { - if (!columns_ttl.empty() || table_ttl.min) - writeString(",", out); - writeString(R"("moves":[)", out); - for (auto it = moves_ttl.begin(); it != moves_ttl.end(); ++it) - { - if (it != moves_ttl.begin()) - writeString(",", out); + bool is_first = columns_ttl.empty() && !table_ttl.min; + write_infos(moves_ttl, "moves", is_first); - write_info_for_expression(it->first, it->second); - } - writeString("]", out); - } - if (!recompression_ttl.empty()) - { - if (!moves_ttl.empty() || !columns_ttl.empty() || table_ttl.min) - writeString(",", out); + is_first &= moves_ttl.empty(); + write_infos(recompression_ttl, "recompression", is_first); - writeString(R"("recompression":[)", out); - for (auto it = recompression_ttl.begin(); it != recompression_ttl.end(); ++it) - { - if (it != recompression_ttl.begin()) - writeString(",", out); + is_first &= recompression_ttl.empty(); + write_infos(group_by_ttl, "group_by", is_first); - write_info_for_expression(it->first, it->second); - } - writeString("]", out); - } - if (!group_by_ttl.empty()) - { - if (!moves_ttl.empty() || !columns_ttl.empty() || !recompression_ttl.empty() || table_ttl.min) - writeString(",", out); + is_first &= group_by_ttl.empty(); + write_infos(rows_where_ttl, "rows_where", is_first); - writeString(R"("group_by":[)", out); - for (auto it = group_by_ttl.begin(); it != group_by_ttl.end(); ++it) - { - if (it != group_by_ttl.begin()) - writeString(",", out); - - write_info_for_expression(it->first, it->second); - } - writeString("]", out); - } writeString("}", out); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index 8ab6d6089db..8b972116384 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -45,6 +45,8 @@ struct MergeTreeDataPartTTLInfos time_t part_min_ttl = 0; time_t part_max_ttl = 0; + TTLInfoMap rows_where_ttl; + TTLInfoMap moves_ttl; TTLInfoMap recompression_ttl; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 68c409eb85c..7c733c660d6 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -379,6 +379,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true); + for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTL()) + updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true); + for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 463a7c3b382..36947706474 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -148,6 +148,16 @@ bool StorageInMemoryMetadata::hasRowsTTL() const return table_ttl.rows_ttl.expression != nullptr; } +TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTL() const +{ + return table_ttl.rows_where_ttl; +} + +bool StorageInMemoryMetadata::hasRowsWhereTTL() const +{ + return !table_ttl.rows_where_ttl.empty(); +} + TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const { return table_ttl.move_ttl; diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index cf9f38fe135..4a00457f7eb 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -109,6 +109,9 @@ struct StorageInMemoryMetadata TTLDescription getRowsTTL() const; bool hasRowsTTL() const; + TTLDescriptions getRowsWhereTTL() const; + bool hasRowsWhereTTL() const; + /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of /// table TTL. TTLDescriptions getMoveTTLs() const; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index f0c936b10c2..6cef9e53097 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -260,6 +260,7 @@ TTLDescription TTLDescription::getTTLFromAST( TTLTableDescription::TTLTableDescription(const TTLTableDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) , rows_ttl(other.rows_ttl) + , rows_where_ttl(other.rows_where_ttl) , move_ttl(other.move_ttl) , recompression_ttl(other.recompression_ttl) , group_by_ttl(other.group_by_ttl) @@ -277,6 +278,7 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription & definition_ast.reset(); rows_ttl = other.rows_ttl; + rows_where_ttl = other.rows_where_ttl; move_ttl = other.move_ttl; recompression_ttl = other.recompression_ttl; group_by_ttl = other.group_by_ttl; @@ -296,16 +298,24 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( result.definition_ast = definition_ast->clone(); - bool seen_delete_ttl = false; + bool have_unconditional_delete_ttl = false; for (const auto & ttl_element_ptr : definition_ast->children) { auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key); if (ttl.mode == TTLMode::DELETE) { - if (seen_delete_ttl) - throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); - result.rows_ttl = ttl; - seen_delete_ttl = true; + if (!ttl.where_expression) + { + if (have_unconditional_delete_ttl) + throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); + + have_unconditional_delete_ttl = true; + result.rows_ttl = ttl; + } + else + { + result.rows_where_ttl.emplace_back(std::move(ttl)); + } } else if (ttl.mode == TTLMode::RECOMPRESS) { diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 1cc3a832447..a2340ad6bcd 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -99,9 +99,12 @@ struct TTLTableDescription /// ^~~~~~~~~~~~~~~definition~~~~~~~~~~~~~~~^ ASTPtr definition_ast; - /// Rows removing TTL + /// Unconditional main removing rows TTL. Can be only one for table. TTLDescription rows_ttl; + /// Conditional removing rows TTLs. + TTLDescriptions rows_where_ttl; + /// Moving data TTL (to other disks or volumes) TTLDescriptions move_ttl; diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference new file mode 100644 index 00000000000..9b3ac02560c --- /dev/null +++ b/tests/queries/0_stateless/01622_multiple_ttls.reference @@ -0,0 +1,9 @@ +1970-10-10 2 +1970-10-10 5 +1970-10-10 8 +2000-10-10 1 +2000-10-10 2 +2000-10-10 4 +2000-10-10 5 +2000-10-10 7 +2000-10-10 8 diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql new file mode 100644 index 00000000000..f86256150b5 --- /dev/null +++ b/tests/queries/0_stateless/01622_multiple_ttls.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS ttl_where; + +CREATE TABLE ttl_where +( + `d` Date, + `i` UInt32 +) +ENGINE = MergeTree +ORDER BY tuple() +TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0, + d + toIntervalYear(40) DELETE WHERE i % 3 = 1; + +INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10); +INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10); + +OPTIMIZE TABLE ttl_where FINAL; + +SELECT * FROM ttl_where ORDER BY d, i; + +DROP TABLE ttl_where; From 61d6a323dddd0c049c10ee1602c5fe75adf49f5b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Jan 2021 03:40:07 +0300 Subject: [PATCH 051/697] multiple TTL with GROUP BY --- src/Parsers/ASTTTLElement.cpp | 12 ++-- src/Parsers/ASTTTLElement.h | 2 +- src/Parsers/ExpressionElementParsers.cpp | 64 +++++++++++-------- src/Parsers/ExpressionElementParsers.h | 8 +++ src/Parsers/ParserAlterQuery.cpp | 29 --------- src/Parsers/ParserAlterQuery.h | 8 --- src/Storages/TTLDescription.cpp | 25 ++++++-- .../0_stateless/01622_multiple_ttls.reference | 13 ++++ .../0_stateless/01622_multiple_ttls.sql | 26 +++++++- 9 files changed, 113 insertions(+), 74 deletions(-) diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 39283a3168e..2d22c1b4307 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -20,7 +20,7 @@ ASTPtr ASTTTLElement::clone() const for (auto & expr : clone->group_by_key) expr = expr->clone(); - for (auto & [name, expr] : clone->group_by_aggregations) + for (auto & expr : clone->group_by_assignments) expr = expr->clone(); return clone; @@ -46,15 +46,15 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st settings.ostr << ", "; (*it)->formatImpl(settings, state, frame); } - if (!group_by_aggregations.empty()) + + if (!group_by_assignments.empty()) { settings.ostr << " SET "; - for (auto it = group_by_aggregations.begin(); it != group_by_aggregations.end(); ++it) + for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it) { - if (it != group_by_aggregations.begin()) + if (it != group_by_assignments.begin()) settings.ostr << ", "; - settings.ostr << it->first << " = "; - it->second->formatImpl(settings, state, frame); + (*it)->formatImpl(settings, state, frame); } } } diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h index aadd019b59c..ce011d76c7b 100644 --- a/src/Parsers/ASTTTLElement.h +++ b/src/Parsers/ASTTTLElement.h @@ -18,7 +18,7 @@ public: String destination_name; ASTs group_by_key; - std::vector> group_by_aggregations; + ASTs group_by_assignments; ASTPtr recompression_codec; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 726e28005e3..0bcbcac302a 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1875,9 +1876,12 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifier parser_identifier; ParserStringLiteral parser_string_literal; ParserExpression parser_exp; - ParserExpressionList parser_expression_list(false); + ParserExpressionList parser_keys_list(false); ParserCodec parser_codec; + ParserList parser_assignment_list( + std::make_unique(), std::make_unique(TokenType::Comma)); + ASTPtr ttl_expr; if (!parser_exp.parse(pos, ttl_expr, expected)) return false; @@ -1911,9 +1915,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } ASTPtr where_expr; - ASTPtr ast_group_by_key; + ASTPtr group_by_key; ASTPtr recompression_codec; - std::vector> group_by_aggregations; + ASTPtr group_by_assignments; if (mode == TTLMode::MOVE) { @@ -1925,30 +1929,13 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (mode == TTLMode::GROUP_BY) { - if (!parser_expression_list.parse(pos, ast_group_by_key, expected)) + if (!parser_keys_list.parse(pos, group_by_key, expected)) return false; if (s_set.ignore(pos)) { - while (true) - { - if (!group_by_aggregations.empty() && !s_comma.ignore(pos)) - break; - - ASTPtr name; - ASTPtr value; - if (!parser_identifier.parse(pos, name, expected)) - return false; - if (!s_eq.ignore(pos)) - return false; - if (!parser_exp.parse(pos, value, expected)) - return false; - - String name_str; - if (!tryGetIdentifierNameInto(name, name_str)) - return false; - group_by_aggregations.emplace_back(name_str, std::move(value)); - } + if (!parser_assignment_list.parse(pos, group_by_assignments, expected)) + return false; } } else if (mode == TTLMode::DELETE && s_where.ignore(pos)) @@ -1972,8 +1959,8 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (mode == TTLMode::GROUP_BY) { - ttl_element->group_by_key = std::move(ast_group_by_key->children); - ttl_element->group_by_aggregations = std::move(group_by_aggregations); + ttl_element->group_by_key = std::move(group_by_key->children); + ttl_element->group_by_assignments = std::move(group_by_assignments->children); } if (mode == TTLMode::RECOMPRESS) @@ -2008,4 +1995,31 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, return false; } +bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto assignment = std::make_shared(); + node = assignment; + + ParserIdentifier p_identifier; + ParserToken s_equals(TokenType::Equals); + ParserExpression p_expression; + + ASTPtr column; + if (!p_identifier.parse(pos, column, expected)) + return false; + + if (!s_equals.ignore(pos, expected)) + return false; + + ASTPtr expression; + if (!p_expression.parse(pos, expression, expected)) + return false; + + tryGetIdentifierNameInto(column, assignment->column_name); + if (expression) + assignment->children.push_back(expression); + + return true; +} + } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 917f084a700..1eb17bfb0bd 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -468,4 +468,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// Part of the UPDATE command or TTL with GROUP BY of the form: col_name = expr +class ParserAssignment : public IParserBase +{ +protected: + const char * getName() const override{ return "column assignment"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index f916537f438..5d20e27e486 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include @@ -651,34 +650,6 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } -bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto assignment = std::make_shared(); - node = assignment; - - ParserIdentifier p_identifier; - ParserToken s_equals(TokenType::Equals); - ParserExpression p_expression; - - ASTPtr column; - if (!p_identifier.parse(pos, column, expected)) - return false; - - if (!s_equals.ignore(pos, expected)) - return false; - - ASTPtr expression; - if (!p_expression.parse(pos, expression, expected)) - return false; - - tryGetIdentifierNameInto(column, assignment->column_name); - if (expression) - assignment->children.push_back(expression); - - return true; -} - - bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto query = std::make_shared(); diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h index 514ef876430..b22b1c6ded2 100644 --- a/src/Parsers/ParserAlterQuery.h +++ b/src/Parsers/ParserAlterQuery.h @@ -63,12 +63,4 @@ public: }; -/// Part of the UPDATE command of the form: col_name = expr -class ParserAssignment : public IParserBase -{ -protected: - const char * getName() const override{ return "column assignment"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 6cef9e53097..06416bfbf36 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -7,12 +8,13 @@ #include #include #include +#include +#include #include #include #include - #include #include @@ -197,16 +199,31 @@ TTLDescription TTLDescription::getTTLFromAST( used_primary_key_columns_set.insert(pk_columns[i]); } - for (const auto & [name, _] : ttl_element->group_by_aggregations) + std::vector> aggregations; + for (const auto & ast : ttl_element->group_by_assignments) + { + const auto assignment = ast->as(); + auto expression = assignment.expression(); + + const auto * expression_func = expression->as(); + if (!expression_func || !AggregateFunctionFactory::instance().isAggregateFunctionName(expression_func->name)) + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name); + + auto type_literal = std::make_shared(columns.getPhysical(assignment.column_name).type->getName()); + expression = makeASTFunction("cast", expression->clone(), type_literal); + aggregations.emplace_back(assignment.column_name, std::move(expression)); + } + + for (const auto & [name, _] : aggregations) aggregation_columns_set.insert(name); - if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size()) + if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size()) throw Exception( "Multiple aggregations set for one column in TTL Expression", ErrorCodes::BAD_TTL_EXPRESSION); result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); - auto aggregations = ttl_element->group_by_aggregations; const auto & primary_key_expressions = primary_key.expression_list_ast->children; for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i) diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference index 9b3ac02560c..d9ebb694584 100644 --- a/tests/queries/0_stateless/01622_multiple_ttls.reference +++ b/tests/queries/0_stateless/01622_multiple_ttls.reference @@ -1,3 +1,4 @@ +TTL WHERE 1970-10-10 2 1970-10-10 5 1970-10-10 8 @@ -7,3 +8,15 @@ 2000-10-10 5 2000-10-10 7 2000-10-10 8 +TTL GROUP BY +1970-10-01 0 4950 +2000-10-01 0 450 +2000-10-01 1 460 +2000-10-01 2 470 +2000-10-01 3 480 +2000-10-01 4 490 +2000-10-01 5 500 +2000-10-01 6 510 +2000-10-01 7 520 +2000-10-01 8 530 +2000-10-01 9 540 diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql index f86256150b5..aa2eeb5759b 100644 --- a/tests/queries/0_stateless/01622_multiple_ttls.sql +++ b/tests/queries/0_stateless/01622_multiple_ttls.sql @@ -1,3 +1,4 @@ +SELECT 'TTL WHERE'; DROP TABLE IF EXISTS ttl_where; CREATE TABLE ttl_where @@ -10,11 +11,34 @@ ORDER BY tuple() TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0, d + toIntervalYear(40) DELETE WHERE i % 3 = 1; +-- This test will fail at 2040-10-10 + INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10); INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10); - OPTIMIZE TABLE ttl_where FINAL; SELECT * FROM ttl_where ORDER BY d, i; DROP TABLE ttl_where; + +SELECT 'TTL GROUP BY'; +DROP TABLE IF EXISTS ttl_group_by; + +CREATE TABLE ttl_group_by +( + `d` Date, + `i` UInt32, + `v` UInt64 +) +ENGINE = MergeTree +ORDER BY (toStartOfMonth(d), i % 10) +TTL d + toIntervalYear(10) GROUP BY toStartOfMonth(d), i % 10 SET d = any(toStartOfMonth(d)), i = any(i % 10), v = sum(v), + d + toIntervalYear(40) GROUP BY toStartOfMonth(d) SET d = any(toStartOfMonth(d)), v = sum(v); + +INSERT INTO ttl_group_by SELECT toDate('2000-10-10'), number, number FROM numbers(100); +INSERT INTO ttl_group_by SELECT toDate('1970-10-10'), number, number FROM numbers(100); +OPTIMIZE TABLE ttl_group_by FINAL; + +SELECT * FROM ttl_group_by ORDER BY d, i; + +DROP TABLE ttl_group_by; From 58b9ef5a10a6d208b3ba68798015b87096ed42c3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Jan 2021 17:04:03 +0300 Subject: [PATCH 052/697] fix TTL info serialization --- .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 138e38e3b78..d1916f31cc3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -143,8 +143,8 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const if (!is_first) writeString(",", out); - writeString(type, out); - writeString(R"(:[)", out); + writeDoubleQuotedString(type, out); + writeString(":[", out); for (auto it = infos.begin(); it != infos.end(); ++it) { if (it != infos.begin()) @@ -162,16 +162,26 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const }; bool is_first = columns_ttl.empty() && !table_ttl.min; - write_infos(moves_ttl, "moves", is_first); + if (!moves_ttl.empty()) + { + write_infos(moves_ttl, "moves", is_first); + is_first = false; + } - is_first &= moves_ttl.empty(); - write_infos(recompression_ttl, "recompression", is_first); + if (!recompression_ttl.empty()) + { + write_infos(recompression_ttl, "recompression", is_first); + is_first = false; + } - is_first &= recompression_ttl.empty(); - write_infos(group_by_ttl, "group_by", is_first); + if (!group_by_ttl.empty()) + { + write_infos(group_by_ttl, "group_by", is_first); + is_first = false; + } - is_first &= group_by_ttl.empty(); - write_infos(rows_where_ttl, "rows_where", is_first); + if (!rows_where_ttl.empty()) + write_infos(rows_where_ttl, "rows_where", is_first); writeString("}", out); } From aed8c78d0d5ac77d7070bc39cda580ca6e92668f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Jan 2021 18:35:07 +0300 Subject: [PATCH 053/697] better check for existence of aggregate function --- src/Parsers/ExpressionElementParsers.cpp | 3 ++- src/Storages/TTLDescription.cpp | 25 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 0bcbcac302a..df67417d218 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1960,7 +1960,8 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (mode == TTLMode::GROUP_BY) { ttl_element->group_by_key = std::move(group_by_key->children); - ttl_element->group_by_assignments = std::move(group_by_assignments->children); + if (group_by_assignments) + ttl_element->group_by_assignments = std::move(group_by_assignments->children); } if (mode == TTLMode::RECOMPRESS) diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 06416bfbf36..42fdd76fc83 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,24 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin } } +class FindAggregateFunctionData +{ +public: + using TypeToVisit = ASTFunction; + bool has_aggregate_function = false; + + void visit(const ASTFunction & func, ASTPtr &) + { + /// Do not throw if found aggregate function inside another aggregate function, + /// because it will be checked, while creating expressions. + if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name)) + has_aggregate_function = true; + } +}; + +using FindAggregateFunctionFinderMatcher = OneTypeMatcher; +using FindAggregateFunctionVisitor = InDepthNodeVisitor; + } TTLDescription::TTLDescription(const TTLDescription & other) @@ -205,8 +224,10 @@ TTLDescription TTLDescription::getTTLFromAST( const auto assignment = ast->as(); auto expression = assignment.expression(); - const auto * expression_func = expression->as(); - if (!expression_func || !AggregateFunctionFactory::instance().isAggregateFunctionName(expression_func->name)) + FindAggregateFunctionVisitor::Data data{false}; + FindAggregateFunctionVisitor(data).visit(expression); + + if (!data.has_aggregate_function) throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name); From 60b88986bf5e0a30412e0b4cbcbd822914ca6a18 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Jan 2021 19:42:49 +0300 Subject: [PATCH 054/697] minor changes near TTL computation --- src/DataStreams/ITTLAlgorithm.cpp | 8 ++++++-- src/DataStreams/ITTLAlgorithm.h | 7 ++++++- src/DataStreams/TTLAggregationAlgorithm.cpp | 4 ++-- src/DataStreams/TTLBlockInputStream.cpp | 7 +++++-- src/DataStreams/TTLColumnAlgorithm.cpp | 18 +++++++----------- src/DataStreams/TTLColumnAlgorithm.h | 4 +++- src/DataStreams/TTLDeleteAlgorithm.cpp | 4 ++-- src/DataStreams/TTLUpdateInfoAlgorithm.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 12 +++++------- src/Storages/TTLDescription.cpp | 4 ++-- 10 files changed, 39 insertions(+), 31 deletions(-) diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp index f0e98e9ab1c..7513e0c6ce0 100644 --- a/src/DataStreams/ITTLAlgorithm.cpp +++ b/src/DataStreams/ITTLAlgorithm.cpp @@ -25,7 +25,8 @@ bool ITTLAlgorithm::isTTLExpired(time_t ttl) const return (ttl && (ttl <= current_time)); } -ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column) +ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn( + const ExpressionActionsPtr & expression, const Block & block, const String & result_column) { if (!expression) return nullptr; @@ -37,7 +38,10 @@ ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & exp for (const auto & column_name : expression->getRequiredColumns()) block_copy.insert(block.getByName(column_name)); - expression->execute(block_copy); + /// Keep number of rows for const expression. + size_t num_rows = block.rows(); + expression->execute(block_copy, num_rows); + return block_copy.getByName(result_column).column; } diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h index 28a371e9289..429ca4bcc61 100644 --- a/src/DataStreams/ITTLAlgorithm.h +++ b/src/DataStreams/ITTLAlgorithm.h @@ -23,10 +23,15 @@ public: bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); } bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); } + /** This function is needed to avoid a conflict between already calculated columns and columns that needed to execute TTL. + * If result column is absent in block, all required columns are copied to new block and expression is executed on new block. + */ + static ColumnPtr executeExpressionAndGetColumn( + const ExpressionActionsPtr & expression, const Block & block, const String & result_column); + protected: bool isTTLExpired(time_t ttl) const; UInt32 getTimestampByIndex(const IColumn * column, size_t index) const; - static ColumnPtr extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column); const TTLDescription description; const TTLInfo old_ttl_info; diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp index 6cc1ac00b7e..ebe08159c55 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.cpp +++ b/src/DataStreams/TTLAggregationAlgorithm.cpp @@ -56,8 +56,8 @@ void TTLAggregationAlgorithm::execute(Block & block) MutableColumns result_columns = header.cloneEmptyColumns(); MutableColumns aggregate_columns = header.cloneEmptyColumns(); - auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); - auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column); + auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); + auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column); size_t rows_aggregated = 0; size_t current_key_start = 0; diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 5154949ae71..7dd5952bb07 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -60,19 +60,22 @@ TTLBlockInputStream::TTLBlockInputStream( for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs()) { ExpressionActionsPtr default_expression; + String default_column_name; auto it = column_defaults.find(name); if (it != column_defaults.end()) { const auto & column = storage_columns.get(name); auto default_ast = it->second.expression->clone(); - default_ast = setAlias(addTypeConversionToAST(std::move(default_ast), column.type->getName()), it->first); + default_ast = addTypeConversionToAST(std::move(default_ast), column.type->getName()); auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical()); default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true); + default_column_name = default_ast->getColumnName(); } algorithms.emplace_back(std::make_unique( - description, old_ttl_infos.columns_ttl[name], current_time_, force_, name, default_expression)); + description, old_ttl_infos.columns_ttl[name], current_time_, + force_, name, default_expression, default_column_name)); } } diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp index afab3af62a7..140631ac0bf 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/DataStreams/TTLColumnAlgorithm.cpp @@ -9,10 +9,12 @@ TTLColumnAlgorithm::TTLColumnAlgorithm( time_t current_time_, bool force_, const String & column_name_, - const ExpressionActionsPtr & default_expression_) + const ExpressionActionsPtr & default_expression_, + const String & default_column_name_) : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) , column_name(column_name_) , default_expression(default_expression_) + , default_column_name(default_column_name_) { if (!isMinTTLExpired()) { @@ -38,17 +40,11 @@ void TTLColumnAlgorithm::execute(Block & block) if (isMaxTTLExpired()) return; - //// TODO: use extractRequiredColumn - ColumnPtr default_column; - if (default_expression) - { - Block block_with_defaults; - block_with_defaults = block; - default_expression->execute(block_with_defaults); - default_column = block_with_defaults.getByName(column_name).column->convertToFullColumnIfConst(); - } + auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name); + if (default_column) + default_column = default_column->convertToFullColumnIfConst(); - auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); auto & column_with_type = block.getByName(column_name); const IColumn * values_column = column_with_type.column.get(); diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h index b2824dba9b0..3b1c199292d 100644 --- a/src/DataStreams/TTLColumnAlgorithm.h +++ b/src/DataStreams/TTLColumnAlgorithm.h @@ -14,7 +14,8 @@ public: time_t current_time_, bool force_, const String & column_name_, - const ExpressionActionsPtr & default_expression_); + const ExpressionActionsPtr & default_expression_, + const String & default_column_name_); void execute(Block & block) override; void finalize(const MutableDataPartPtr & data_part) const override; @@ -22,6 +23,7 @@ public: private: const String column_name; const ExpressionActionsPtr default_expression; + const String default_column_name; bool is_fully_empty = true; }; diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp index 7227b40dad2..c364bb06f3e 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.cpp +++ b/src/DataStreams/TTLDeleteAlgorithm.cpp @@ -16,8 +16,8 @@ void TTLDeleteAlgorithm::execute(Block & block) if (!block || !isMinTTLExpired()) return; - auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); - auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column); + auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); + auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column); MutableColumns result_columns; const auto & column_names = block.getNames(); diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp index ce4d4128eec..d5feb14658b 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp +++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp @@ -14,7 +14,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block) if (!block) return; - auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column); + auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); for (size_t i = 0; i < block.rows(); ++i) { UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 7c733c660d6..42fc24c8c8e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -95,23 +96,20 @@ void updateTTL( const Block & block, bool update_part_min_max_ttls) { - Block block_copy = block; - if (!block_copy.has(ttl_entry.result_column)) - ttl_entry.expression->execute(block_copy); + auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column); - const IColumn * column = block_copy.getByName(ttl_entry.result_column).column.get(); - if (const ColumnUInt16 * column_date = typeid_cast(column)) + if (const ColumnUInt16 * column_date = typeid_cast(ttl_column.get())) { const auto & date_lut = DateLUT::instance(); for (const auto & val : column_date->getData()) ttl_info.update(date_lut.fromDayNum(DayNum(val))); } - else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) + else if (const ColumnUInt32 * column_date_time = typeid_cast(ttl_column.get())) { for (const auto & val : column_date_time->getData()) ttl_info.update(val); } - else if (const ColumnConst * column_const = typeid_cast(column)) + else if (const ColumnConst * column_const = typeid_cast(ttl_column.get())) { if (typeid_cast(&column_const->getDataColumn())) { diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 42fdd76fc83..19195e6ba6d 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -231,8 +232,7 @@ TTLDescription TTLDescription::getTTLFromAST( throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name); - auto type_literal = std::make_shared(columns.getPhysical(assignment.column_name).type->getName()); - expression = makeASTFunction("cast", expression->clone(), type_literal); + expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName()); aggregations.emplace_back(assignment.column_name, std::move(expression)); } From 0e903552a06b1628a1dc9b2ca7e6b0383d856fba Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 13 Jan 2021 17:04:27 +0300 Subject: [PATCH 055/697] fix TTLs with WHERE --- src/DataStreams/TTLBlockInputStream.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 6 +++--- src/Storages/StorageInMemoryMetadata.h | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 7dd5952bb07..4f141a03475 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -44,7 +44,7 @@ TTLBlockInputStream::TTLBlockInputStream( algorithms.emplace_back(std::move(algorithm)); } - for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTL()) + for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs()) algorithms.emplace_back(std::make_unique( where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_)); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 6ba351a4459..a937208b66a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1142,7 +1142,7 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada return false; } - for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTL()) + for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTLs()) { if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column)) return false; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index b28f0979dc0..c3eafd2423e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -377,7 +377,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true); - for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTL()) + for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true); for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 36947706474..f810c73c02a 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -125,7 +125,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const bool StorageInMemoryMetadata::hasAnyTableTTL() const { - return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL(); + return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL() || hasAnyRowsWhereTTL(); } TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const @@ -148,12 +148,12 @@ bool StorageInMemoryMetadata::hasRowsTTL() const return table_ttl.rows_ttl.expression != nullptr; } -TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTL() const +TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const { return table_ttl.rows_where_ttl; } -bool StorageInMemoryMetadata::hasRowsWhereTTL() const +bool StorageInMemoryMetadata::hasAnyRowsWhereTTL() const { return !table_ttl.rows_where_ttl.empty(); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 9a0f730f1f4..038416aff7d 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -109,8 +109,8 @@ struct StorageInMemoryMetadata TTLDescription getRowsTTL() const; bool hasRowsTTL() const; - TTLDescriptions getRowsWhereTTL() const; - bool hasRowsWhereTTL() const; + TTLDescriptions getRowsWhereTTLs() const; + bool hasAnyRowsWhereTTL() const; /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of /// table TTL. From eda9ca82030a2e74cd808899316090ddf3fdf1e6 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Wed, 13 Jan 2021 22:52:09 +0300 Subject: [PATCH 056/697] Creating multiword-types.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Создал страницу multiword-types.md и обновил таблицу соответствия типов данных. --- docs/en/sql-reference/ansi.md | 4 ++-- .../sql-reference/data-types/multiword-types.md | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 docs/en/sql-reference/data-types/multiword-types.md diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md index fc759f9f79a..eb6e0152fb0 100644 --- a/docs/en/sql-reference/ansi.md +++ b/docs/en/sql-reference/ansi.md @@ -25,14 +25,14 @@ The following table lists cases when query feature works in ClickHouse, but beha |------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | **E011** | **Numeric data types** | **Partial**{.text-warning} | | | E011-01 | INTEGER and SMALLINT data types | Yes {.text-success} | | -| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Partial {.text-warning} | `FLOAT()`, `REAL` and `DOUBLE PRECISION` are not supported | +| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Partial {.text-warning} | `FLOAT()` and `REAL` are not supported | | E011-03 | DECIMAL and NUMERIC data types | Partial {.text-warning} | Only `DECIMAL(p,s)` is supported, not `NUMERIC` | | E011-04 | Arithmetic operators | Yes {.text-success} | | | E011-05 | Numeric comparison | Yes {.text-success} | | | E011-06 | Implicit casting among the numeric data types | No {.text-danger} | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast | | **E021** | **Character string types** | **Partial**{.text-warning} | | | E021-01 | CHARACTER data type | No {.text-danger} | | -| E021-02 | CHARACTER VARYING data type | No {.text-danger} | `String` behaves similarly, but without length limit in parentheses | +| E021-02 | CHARACTER VARYING data type | Yes {.text-danger} | | | E021-03 | Character literals | Partial {.text-warning} | No automatic concatenation of consecutive literals and character set support | | E021-04 | CHARACTER_LENGTH function | Partial {.text-warning} | No `USING` clause | | E021-05 | OCTET_LENGTH function | No {.text-danger} | `LENGTH` behaves similarly | diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md new file mode 100644 index 00000000000..ea6a12ac82e --- /dev/null +++ b/docs/en/sql-reference/data-types/multiword-types.md @@ -0,0 +1,17 @@ +--- +toc_priority: 61 +toc_title: Multiword Type Names +--- + +# Multiword Types {#multiword-types} + +When creating tables, you can also use data types with a name consisting of several words. This is necessary for better SQL compatibility. + +## Multiword Types Support {#multiword-types-support} + +| Multiword types | Simple types | +|----------------------------------|--------------------------------------------------------------| +| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | +| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | + +[Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/multiword-types/) From d1feb2f7616ab758a9712cbe5465810a15b711b6 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 14 Jan 2021 02:16:40 +0300 Subject: [PATCH 057/697] minor fixes --- .../utilities/clickhouse-benchmark.md | 10 +++++----- .../utilities/clickhouse-benchmark.md | 20 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index f7ea0aa1302..49c18b02e2d 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -41,14 +41,14 @@ clickhouse-benchmark [keys] < queries_file; ## Keys {#clickhouse-benchmark-keys} - `--query=WORD` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: `1`. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set `0`). Default value: `1`. +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. - `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: `9000`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. -- `-i N`, `--iterations=N` — Total number of queries. Default value: `0` (repeat forever). +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). - `-r`, `--randomize` — Random order of queries execution if there is more than one input query. - `-s`, `--secure` — Using `TLS` connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: `0` (time limit disabled). +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 218e41c6a72..392ed859d58 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -41,25 +41,25 @@ clickhouse-benchmark [keys] < queries_file; ## Ключи {#clickhouse-benchmark-keys} - `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. -- `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: `1`. -- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями (чтобы отлючить сообщения, установите `0`). Значение по умолчанию: `1`. +- `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: 1. +- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями (чтобы отключить сообщения, установите 0). Значение по умолчанию: 1. - `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. -- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: `9000`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. -- `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: `0` (вечно будет повторяться). +- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: 9000. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. +- `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: 0 (вечно будет повторяться). - `-r`, `--randomize` — случайный порядок выполнения запросов при наличии более одного входного запроса. -- `-s`, `--secure` — использование `TLS` соединения. -- `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: `0` (лимит отключен). -- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: `5`. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. +- `-s`, `--secure` — используется `TLS` соединения. +- `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: 0 (лимит отключен). +- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. - `--cumulative` — выводит совокупность данных, а не данные за интервал. - `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`. - `--json=FILEPATH` — формат вывода `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит сообщение в указанный JSON-файл. - `--user=USERNAME` — имя пользователя ClickHouse. Значение по умолчанию: `default`. - `--password=PSWD` — пароль пользователя ClickHouse. Значение по умолчанию: пустая строка. -- `--stacktrace` — вывод трассировки стека. Когда этот ключ указан, `clickhouse-bencmark` выводит трассировку стека исключений. +- `--stacktrace` — вывод трассировки стека исключений. - `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. -- `--help` — показывает help-сообщение. +- `--help` — показывает справку. -Если нужно применить какие-нибудь [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. +Если нужно применить [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. ## Вывод {#clickhouse-benchmark-output} From 547c7af1b1c9b9271e1fcba3263f11951c6f8161 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 14 Jan 2021 17:31:35 +0300 Subject: [PATCH 058/697] fix checkpoint in PeekableReadBuffer over ConcatReadBuffer --- src/IO/PeekableReadBuffer.cpp | 24 +++++++++---------- src/IO/PeekableReadBuffer.h | 6 ++--- src/Interpreters/InterpreterInsertQuery.cpp | 1 - src/Interpreters/executeQuery.cpp | 1 - ...183_custom_separated_format_http.reference | 2 ++ .../01183_custom_separated_format_http.sh | 14 +++++++++++ 6 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/01183_custom_separated_format_http.reference create mode 100755 tests/queries/0_stateless/01183_custom_separated_format_http.sh diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index 8ad0e7b572e..8d07b07ddea 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -25,7 +25,7 @@ void PeekableReadBuffer::reset() checkStateCorrect(); peeked_size = 0; - checkpoint = nullptr; + checkpoint = std::nullopt; checkpoint_in_own_memory = false; if (!currentlyReadFromOwnMemory()) @@ -47,7 +47,7 @@ bool PeekableReadBuffer::peekNext() { /// Don't have to copy all data from sub-buffer if there is no data in own memory (checkpoint and pos are in sub-buffer) if (checkpoint) - copy_from = checkpoint; + copy_from = *checkpoint; bytes_to_copy = sub_buf.buffer().end() - copy_from; if (!bytes_to_copy) { @@ -57,7 +57,7 @@ bool PeekableReadBuffer::peekNext() bool res = sub_buf.next(); BufferBase::set(sub_buf.buffer().begin(), sub_buf.buffer().size(), sub_buf.offset()); if (checkpoint) - checkpoint = pos; + checkpoint.emplace(pos); checkStateCorrect(); return res; @@ -79,7 +79,7 @@ bool PeekableReadBuffer::peekNext() /// Otherwise, checkpoint now at the beginning of own memory if (checkpoint && useSubbufferOnly()) { - checkpoint = memory.data(); + checkpoint.emplace(memory.data()); checkpoint_in_own_memory = true; } if (currentlyReadFromOwnMemory()) @@ -115,9 +115,9 @@ void PeekableReadBuffer::rollbackToCheckpoint() if (!checkpoint) throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR); else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) - pos = checkpoint; + pos = *checkpoint; else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory - BufferBase::set(memory.data(), peeked_size, checkpoint - memory.data()); + BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data()); checkStateCorrect(); } @@ -169,7 +169,7 @@ void PeekableReadBuffer::checkStateCorrect() const { if (!peeked_size) throw DB::Exception("Checkpoint in empty own buffer", ErrorCodes::LOGICAL_ERROR); - if (currentlyReadFromOwnMemory() && pos < checkpoint) + if (currentlyReadFromOwnMemory() && pos < *checkpoint) throw DB::Exception("Current position in own buffer before checkpoint in own buffer", ErrorCodes::LOGICAL_ERROR); if (!currentlyReadFromOwnMemory() && pos < sub_buf.position()) throw DB::Exception("Current position in subbuffer less than sub_buf.position()", ErrorCodes::LOGICAL_ERROR); @@ -180,7 +180,7 @@ void PeekableReadBuffer::checkStateCorrect() const throw DB::Exception("Own buffer is not empty", ErrorCodes::LOGICAL_ERROR); if (currentlyReadFromOwnMemory()) throw DB::Exception("Current position in own buffer before checkpoint in subbuffer", ErrorCodes::LOGICAL_ERROR); - if (pos < checkpoint) + if (pos < *checkpoint) throw DB::Exception("Current position in subbuffer before checkpoint in subbuffer", ErrorCodes::LOGICAL_ERROR); } } @@ -202,7 +202,7 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) bool need_update_pos = currentlyReadFromOwnMemory(); size_t offset = 0; if (need_update_checkpoint) - offset = checkpoint - memory.data(); + offset = *checkpoint - memory.data(); else if (need_update_pos) offset = this->offset(); @@ -216,7 +216,7 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) memmove(memory.data(), memory.data() + offset, peeked_size); if (need_update_checkpoint) - checkpoint -= offset; + *checkpoint -= offset; if (need_update_pos) pos -= offset; } @@ -235,7 +235,7 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) memory.resize(new_size_amortized); if (need_update_checkpoint) - checkpoint = memory.data() + offset; + checkpoint.emplace(memory.data() + offset); if (need_update_pos) { BufferBase::set(memory.data(), peeked_size, pos_offset); @@ -252,7 +252,7 @@ void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() checkStateCorrect(); if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory()) - return; /// is't already continuous + return; /// it's already continuous size_t bytes_to_append = pos - sub_buf.position(); resizeOwnMemoryIfNecessary(bytes_to_append); diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 62b6f08f621..ffc80489d24 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -38,7 +38,7 @@ public: /// Don't need to store unread data anymore peeked_size = 0; } - checkpoint = pos; + checkpoint.emplace(pos); // FIXME: we are checking checkpoint existence in few places (rollbackToCheckpoint/dropCheckpoint) // by simple if(checkpoint) but checkpoint can be nullptr after @@ -58,7 +58,7 @@ public: /// Don't need to store unread data anymore peeked_size = 0; } - checkpoint = nullptr; + checkpoint = std::nullopt; checkpoint_in_own_memory = false; } @@ -97,7 +97,7 @@ private: ReadBuffer & sub_buf; const size_t unread_limit; size_t peeked_size = 0; - Position checkpoint = nullptr; + std::optional checkpoint = std::nullopt; bool checkpoint_in_own_memory = false; }; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 742c9f6736f..3a76e81f1d4 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 5928da156f3..16dbae37f5d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/tests/queries/0_stateless/01183_custom_separated_format_http.reference b/tests/queries/0_stateless/01183_custom_separated_format_http.reference new file mode 100644 index 00000000000..61f15592b64 --- /dev/null +++ b/tests/queries/0_stateless/01183_custom_separated_format_http.reference @@ -0,0 +1,2 @@ +2021-Jan d1 d2 +1000000 1 diff --git a/tests/queries/0_stateless/01183_custom_separated_format_http.sh b/tests/queries/0_stateless/01183_custom_separated_format_http.sh new file mode 100755 index 00000000000..f981ef5b890 --- /dev/null +++ b/tests/queries/0_stateless/01183_custom_separated_format_http.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo 'DROP TABLE IF EXISTS mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'CREATE TABLE mydb (datetime String, d1 String, d2 String ) ENGINE=Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo "2021-Jan^d1^d2" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- +echo -n "" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- +echo 'SELECT * FROM mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +printf "2021-Jan^d1^d2\n%.0s" {1..999999} | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- +echo 'SELECT count(*), countDistinct(datetime, d1, d2) FROM mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'DROP TABLE mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- From ce086197b7a711c4c84e9fc6d4bd28ba2ce32a3e Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Thu, 14 Jan 2021 20:29:10 +0300 Subject: [PATCH 059/697] DOCSUP-5272: Fix query syntax for DOCSUP-4261 --- docs/en/operations/settings/settings.md | 6 +----- docs/ru/operations/settings/settings.md | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index d3a4d50d21c..4433c27e181 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2491,11 +2491,7 @@ Default value: 0. Consider the following query with aggregate functions: ```sql -SELECT - SUM(-1), - MAX(0) -FROM system.one -WHERE 0 +SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; ``` With `aggregate_functions_null_for_empty = 0` it would produce: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index aa549fc5776..21bb58ca01c 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2360,11 +2360,7 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow; Рассмотрим запрос с агрегирующими функциями: ```sql -SELECT - SUM(-1), - MAX(0) -FROM system.one -WHERE 0 +SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; ``` Результат запроса с настройкой `aggregate_functions_null_for_empty = 0`: From c448542b5c83d02861e49e6fc4b49002814a1c27 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Thu, 14 Jan 2021 20:36:10 +0300 Subject: [PATCH 060/697] DOCSUP-5272: Fix PR 17121 mispelling in RU --- docs/ru/engines/table-engines/mergetree-family/replication.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index f17e1b035d4..a8a308b104f 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -153,7 +153,7 @@ CREATE TABLE table_name ```xml /clickhouse/tables/{shard}/{database}/{table} -{replica} +{replica} ``` В этом случае можно опустить аргументы при создании таблиц: From 5c97e473931f20017d496694ab3451aaf8408b15 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Thu, 14 Jan 2021 20:50:32 +0300 Subject: [PATCH 061/697] DOCSUP-5272: Add PR#17213 translation to RU --- .../sql-reference/statements/create/table.md | 14 ++------------ .../sql-reference/statements/create/table.md | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index b1a5fdb19b5..1dd9238a9f2 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -114,23 +114,13 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m - inside the column list ``` sql -CREATE TABLE db.table_name -( - name1 type1, name2 type2, ..., - PRIMARY KEY(expr1[, expr2,...])] -) -ENGINE = engine; +CREATE TABLE db.table_name (name1 type1, name2 type2, ..., PRIMARY KEY (expr1[, expr2,...])]) ENGINE = engine; ``` - outside the column list ``` sql -CREATE TABLE db.table_name -( - name1 type1, name2 type2, ... -) -ENGINE = engine -PRIMARY KEY(expr1[, expr2,...]); +CREATE TABLE db.table_name (name1 type1, name2 type2, ...) ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` You can't combine both ways in one query. diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index d54ec189a1a..0a3e187cc3b 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -22,6 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Описание столбца, это `name type`, в простейшем случае. Пример: `RegionID UInt32`. Также могут быть указаны выражения для значений по умолчанию - смотрите ниже. +При необходимости можно указать первичный ключ с одним или несколькими ключевыми выражениями. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine] ``` @@ -80,6 +81,24 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... Отсутствует возможность задать значения по умолчанию для элементов вложенных структур данных. +## Первичный ключ {#primary-key} + +Вы можете определить [первичный ключ](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) при создании таблицы. Первичный ключ может быть указан двумя способами: + +- В списке столбцов: + +``` sql +CREATE TABLE db.table_name (name1 type1, name2 type2, ..., PRIMARY KEY (expr1[, expr2,...])]) ENGINE = engine; +``` + +- Вне списка столбцов: + +``` sql +CREATE TABLE db.table_name (name1 type1, name2 type2, ...) ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); +``` + +Вы не можете сочетать оба способа в одном запросе. + ### Ограничения (constraints) {#constraints} Наряду с объявлением столбцов можно объявить ограничения на значения в столбцах таблицы: From f2184eea5e333f6b9d1318954fa2961c5a83317e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 14 Jan 2021 21:05:08 +0300 Subject: [PATCH 062/697] try update cassandra driver library --- contrib/cassandra | 2 +- .../test_cassandra.py | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/contrib/cassandra b/contrib/cassandra index d10187efb25..8579657ce2a 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit d10187efb25b26da391def077edf3c6f2f3a23dd +Subproject commit 8579657ce2a945e27ea1ab4616c34281ca6d3845 diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py index 2d54d846169..0c69b7f7cbb 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py @@ -70,20 +70,14 @@ def started_cluster(): finally: cluster.shutdown() -# We have a lot of race conditions in cassandra library -# https://github.com/ClickHouse/ClickHouse/issues/15754. -# TODO fix them and enable tests as soon as possible. @pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE) def test_simple(started_cluster, layout_name): - if not node.is_built_with_thread_sanitizer(): - simple_tester.execute(layout_name, node) + simple_tester.execute(layout_name, node) @pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX) def test_complex(started_cluster, layout_name): - if not node.is_built_with_thread_sanitizer(): - complex_tester.execute(layout_name, node) + complex_tester.execute(layout_name, node) @pytest.mark.parametrize("layout_name", LAYOUTS_RANGED) def test_ranged(started_cluster, layout_name): - if not node.is_built_with_thread_sanitizer(): - ranged_tester.execute(layout_name, node) + ranged_tester.execute(layout_name, node) From 812f8ee19701e14c1d4de0d0d88b02f8cfdd2f74 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Thu, 14 Jan 2021 21:10:32 +0300 Subject: [PATCH 063/697] DOCSUP-5272: primary key crosslink in doc --- docs/en/sql-reference/statements/create/table.md | 2 +- docs/ru/sql-reference/statements/create/table.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 1dd9238a9f2..95ac0252eaa 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` Creates a table named `name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine. -The structure of the table is a list of column descriptions, secondary indexes and constraints . If primary key is supported by the engine, it will be indicated as parameter for the table engine. +The structure of the table is a list of column descriptions, secondary indexes and constraints . If [primary key](#primary-key) is supported by the engine, it will be indicated as parameter for the table engine. A column description is `name type` in the simplest case. Example: `RegionID UInt32`. diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 0a3e187cc3b..e91a7f15903 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Описание столбца, это `name type`, в простейшем случае. Пример: `RegionID UInt32`. Также могут быть указаны выражения для значений по умолчанию - смотрите ниже. -При необходимости можно указать первичный ключ с одним или несколькими ключевыми выражениями. +При необходимости можно указать [первичный ключ](#primary-key) с одним или несколькими ключевыми выражениями. ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine] ``` From 588d9f0a5600f030a88e698e870404ec37faa263 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 15 Jan 2021 01:51:55 +0300 Subject: [PATCH 064/697] fix --- cmake/find/zlib.cmake | 1 + contrib/cassandra | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/find/zlib.cmake b/cmake/find/zlib.cmake index 9a82699dc3a..bd96424b60d 100644 --- a/cmake/find/zlib.cmake +++ b/cmake/find/zlib.cmake @@ -35,6 +35,7 @@ if (NOT ZLIB_FOUND AND NOT MISSING_INTERNAL_ZLIB_LIBRARY) set (ZLIB_INCLUDE_DIRECTORIES ${ZLIB_INCLUDE_DIR}) # for protobuf set (ZLIB_FOUND 1) # for poco set (ZLIB_LIBRARIES zlib CACHE INTERNAL "") + set (ZLIB_LIBRARY_NAME ${ZLIB_LIBRARIES}) # for cassandra set (ZLIB_NAME "${INTERNAL_ZLIB_NAME}") endif () diff --git a/contrib/cassandra b/contrib/cassandra index 8579657ce2a..fbbd9fc4c63 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 8579657ce2a945e27ea1ab4616c34281ca6d3845 +Subproject commit fbbd9fc4c634e9daad24714cd03cb390615d85ed From e57e2424821db11c600b4139cf1a0e050cc5f827 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 16 Jan 2021 22:56:07 +0300 Subject: [PATCH 065/697] fix --- base/harmful/harmful.c | 4 ++-- contrib/cassandra | 2 +- tests/tsan_suppressions.txt | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index df625a3e4d6..4032fbf3b90 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -118,7 +118,7 @@ TRAP(logout) TRAP(logwtmp) TRAP(lrand48) TRAP(mallinfo) -TRAP(mallopt) +//TRAP(mallopt) // Used by tsan TRAP(mblen) TRAP(mbrlen) TRAP(mbrtowc) @@ -193,7 +193,7 @@ TRAP(dbm_nextkey) TRAP(dbm_open) TRAP(dbm_store) TRAP(dirname) -TRAP(dlerror) +//TRAP(dlerror) // Used by tsan TRAP(ftw) TRAP(getc_unlocked) //TRAP(getenv) // Ok at program startup diff --git a/contrib/cassandra b/contrib/cassandra index fbbd9fc4c63..2935f6f15fe 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit fbbd9fc4c634e9daad24714cd03cb390615d85ed +Subproject commit 2935f6f15fea889899750560aa6331e9119e9dd0 diff --git a/tests/tsan_suppressions.txt b/tests/tsan_suppressions.txt index ccc36d876f7..668710a33d7 100644 --- a/tests/tsan_suppressions.txt +++ b/tests/tsan_suppressions.txt @@ -1,4 +1,2 @@ # looks like a bug in clang-11 thread sanitizer, detects normal data race with random FD in this method race:DB::LazyPipeFDs::close -# races in openSSL https://github.com/openssl/openssl/issues/11974 -fun:evp_cipher_cache_constants From 322a5e515300cef09ba52bef4f730e84dc8c1295 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sat, 16 Jan 2021 23:00:00 +0300 Subject: [PATCH 066/697] First draft --- docs/en/operations/caches.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 docs/en/operations/caches.md diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md new file mode 100644 index 00000000000..0107c340019 --- /dev/null +++ b/docs/en/operations/caches.md @@ -0,0 +1,24 @@ +--- +toc_priority: 65 +toc_title: Caches +--- + +# Cache Types {#cache-types} + +When performing queries, ClichHouse uses different caches. + +Main cache types: +- `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. +- `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. + +Additional cache types: +- DNS cache +- [regexp](../interfaces/formats.md#data-format-regexp) cache +- compiled expressions cache +- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache +- [dictionaries data cache](../sql-reference/dictionaries/index.md) + +Not directly used: +- page cache OS + +[Original article](https://clickhouse.tech/docs/en/operations/caches/) From f6d1f76b42bcc1a1161802fe961239a6ffff2f91 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 04:30:02 +0300 Subject: [PATCH 067/697] Add some multiword types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Обновил таблицу ANSI и добавил новые типы данных. --- docs/en/sql-reference/ansi.md | 2 +- docs/en/sql-reference/data-types/multiword-types.md | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md index eb6e0152fb0..5ca216d11fa 100644 --- a/docs/en/sql-reference/ansi.md +++ b/docs/en/sql-reference/ansi.md @@ -25,7 +25,7 @@ The following table lists cases when query feature works in ClickHouse, but beha |------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | **E011** | **Numeric data types** | **Partial**{.text-warning} | | | E011-01 | INTEGER and SMALLINT data types | Yes {.text-success} | | -| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Partial {.text-warning} | `FLOAT()` and `REAL` are not supported | +| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Yes {.text-warning} | | | E011-03 | DECIMAL and NUMERIC data types | Partial {.text-warning} | Only `DECIMAL(p,s)` is supported, not `NUMERIC` | | E011-04 | Arithmetic operators | Yes {.text-success} | | | E011-05 | Numeric comparison | Yes {.text-success} | | diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md index ea6a12ac82e..f55efcd7a51 100644 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ b/docs/en/sql-reference/data-types/multiword-types.md @@ -12,6 +12,18 @@ When creating tables, you can also use data types with a name consisting of seve | Multiword types | Simple types | |----------------------------------|--------------------------------------------------------------| | DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | +| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | | CHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | +| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | +| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | [Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/multiword-types/) From 936020b381ceca5eacc62cf71b4a6c0de7842aa4 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 16:25:20 +0300 Subject: [PATCH 068/697] Update the description of FINAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Задокументировал настройку max_final_threads и обновил описание запросов с FINAL. --- docs/en/operations/settings/settings.md | 15 +++++++++++---- docs/en/sql-reference/statements/select/from.md | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index d3a4d50d21c..b6c57a0a40f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2474,7 +2474,6 @@ Possible values: Default value: `0`. - ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty} Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility. @@ -2512,7 +2511,6 @@ With `aggregate_functions_null_for_empty = 1` the result would be: └───────────────┴──────────────┘ ``` - ## union_default_mode {#union-default-mode} Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`. @@ -2527,7 +2525,6 @@ Default value: `''`. See examples in [UNION](../../sql-reference/statements/select/union.md). - ## data_type_default_nullable {#data_type_default_nullable} Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). @@ -2539,7 +2536,6 @@ Possible values: Default value: `0`. - ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} Enables special logic to perform merges on replicas. @@ -2559,4 +2555,15 @@ High values for that threshold may lead to replication delays. It can be useful when merges are CPU bounded not IO bounded (performing heavy data compression, calculating aggregate functions or default expressions that require a large amount of calculations, or just very high number of tiny merges). +## max_final_threads {#max-final-threads} + +Sets maximum number of threads to read from table with [FINAL](../../sql-reference\statements\select.md#select-from-final) modifier. + +Possible values: + +- Positive integer. +- 0 or 1 — Disabled. Executed in a single thread. + +Default value: `16`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 71586e15a31..b2a330a45bf 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -25,6 +25,8 @@ It is applicable when selecting data from tables that use the [MergeTree](../../ - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. +`SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. + ### Drawbacks {#drawbacks} Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because: From ed3c0f0dd4d214b08f06e7f664bf8ea27a74495b Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 17:35:52 +0300 Subject: [PATCH 069/697] Update settings.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Исправил ссылку. --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b6c57a0a40f..c8a72460eda 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2557,7 +2557,7 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da ## max_final_threads {#max-final-threads} -Sets maximum number of threads to read from table with [FINAL](../../sql-reference\statements\select.md#select-from-final) modifier. +Sets maximum number of threads to read from table with [FINAL](../../sql-reference\statements\select\from.md#select-from-final) modifier. Possible values: From 73549d72856eb03abfad754ea39598465bb6fddd Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 20:37:41 +0300 Subject: [PATCH 070/697] Update settings.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил ссылку. --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c8a72460eda..01038d9bad5 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2557,7 +2557,7 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da ## max_final_threads {#max-final-threads} -Sets maximum number of threads to read from table with [FINAL](../../sql-reference\statements\select\from.md#select-from-final) modifier. +Sets maximum number of threads to read from table with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier. Possible values: From c1b8ab1e5a583cc0448a51277124930f7611f421 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 21:13:27 +0300 Subject: [PATCH 071/697] Edit and translate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил английский вариант и перевел на русский язык. --- docs/en/interfaces/formats.md | 7 ++--- .../operations/utilities/clickhouse-copier.md | 4 +-- docs/ru/interfaces/formats.md | 28 +++++++++++++++++++ .../operations/utilities/clickhouse-copier.md | 10 +++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index da8224e00d8..d84e68f427c 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -515,9 +515,9 @@ Example: ## JSONAsString {#jsonasstring} -In this format, a single JSON object is interpreted as a single value. If input has several JSON objects (comma separated) they will be interpreted as a sepatate rows. +In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated) they will be interpreted as separate rows. -This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it. +This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it. **Example** @@ -526,7 +526,7 @@ Query: ``` sql DROP TABLE IF EXISTS json_as_string; CREATE TABLE json_as_string (json String) ENGINE = Memory; -INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} +INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}; SELECT * FROM json_as_string; ``` @@ -540,7 +540,6 @@ Result: └───────────────────────────────────┘ ``` - ## JSONCompact {#jsoncompact} ## JSONCompactString {#jsoncompactstring} diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index 4137bd6f334..056b06271ef 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -71,8 +71,8 @@ Parameters: diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index ea8df043357..9dec8a9c36c 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -24,6 +24,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT | [Vertical](#vertical) | ✗ | ✔ | | [VerticalRaw](#verticalraw) | ✗ | ✔ | | [JSON](#json) | ✗ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONString](#jsonstring) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | @@ -490,6 +491,33 @@ ClickHouse поддерживает [NULL](../sql-reference/syntax.md), кото } ``` +## JSONAsString {#jsonasstring} + +В этом формате один объект JSON интерпретируется как одно значение. Если входные данные имеют несколько объектов JSON, разделенных запятой, то они будут интерпретироваться как отдельные строки. + +В этом формате парситься может только таблица с единственным полем типа [String](../sql-reference/data-types/string.md). Остальные столбцы должны быть заданы как [DEFAULT](../sql-reference/statements/create/table.md#default) или [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), либо отсутствовать. Как только вы соберете весь объект JSON в строку, для его обработки вы можете использовать [функции для работы с JSON](../sql-reference/functions/json-functions.md). + +**Пример** + +Запрос: + +``` sql +DROP TABLE IF EXISTS json_as_string; +CREATE TABLE json_as_string (json String) ENGINE = Memory; +INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}; +SELECT * FROM json_as_string; +``` + +Результат: + +``` text +┌─json──────────────────────────────┐ +│ {"foo":{"bar":{"x":"y"},"baz":1}} │ +│ {} │ +│ {"any json stucture":1} │ +└───────────────────────────────────┘ +``` + ## JSONCompact {#jsoncompact} ## JSONCompactString {#jsoncompactstring} diff --git a/docs/ru/operations/utilities/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md index 64e3c1eee12..243ad7f379b 100644 --- a/docs/ru/operations/utilities/clickhouse-copier.md +++ b/docs/ru/operations/utilities/clickhouse-copier.md @@ -67,11 +67,21 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas + false 127.0.0.1 9000 + ... From d9fb9fdd2dc3522d3eaae0539c762a9cbcd2b02a Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 17 Jan 2021 23:53:39 +0300 Subject: [PATCH 072/697] Update formats.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Правлю ссылки. --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 9dec8a9c36c..69a856a4e2d 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -495,7 +495,7 @@ ClickHouse поддерживает [NULL](../sql-reference/syntax.md), кото В этом формате один объект JSON интерпретируется как одно значение. Если входные данные имеют несколько объектов JSON, разделенных запятой, то они будут интерпретироваться как отдельные строки. -В этом формате парситься может только таблица с единственным полем типа [String](../sql-reference/data-types/string.md). Остальные столбцы должны быть заданы как [DEFAULT](../sql-reference/statements/create/table.md#default) или [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), либо отсутствовать. Как только вы соберете весь объект JSON в строку, для его обработки вы можете использовать [функции для работы с JSON](../sql-reference/functions/json-functions.md). +В этом формате парситься может только таблица с единственным полем типа [String](../sql-reference/data-types/string.md). Остальные столбцы должны быть заданы как [DEFAULT](../sql-reference/statements/create/table.md#create-default-values) или [MATERIALIZED](../sql-reference/statements/create/table.md#create-default-values), либо отсутствовать. Как только вы соберете весь объект JSON в строку, для его обработки вы можете использовать [функции для работы с JSON](../sql-reference/functions/json-functions.md). **Пример** From 30ad216b0f9d8982ea9b23906e1a134943b141f0 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 18 Jan 2021 00:00:54 +0300 Subject: [PATCH 073/697] Update formats.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Внес небольшие поправки. --- docs/en/interfaces/formats.md | 2 +- docs/ru/interfaces/formats.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d84e68f427c..11291d61300 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -526,7 +526,7 @@ Query: ``` sql DROP TABLE IF EXISTS json_as_string; CREATE TABLE json_as_string (json String) ENGINE = Memory; -INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}; +INSERT INTO json_as_string (json) FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} SELECT * FROM json_as_string; ``` diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 69a856a4e2d..97b72f79c86 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -504,7 +504,7 @@ ClickHouse поддерживает [NULL](../sql-reference/syntax.md), кото ``` sql DROP TABLE IF EXISTS json_as_string; CREATE TABLE json_as_string (json String) ENGINE = Memory; -INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}; +INSERT INTO json_as_string (json) FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} SELECT * FROM json_as_string; ``` From 6a78b10e0b1ef3e341dfc7959ef24b7dede0dc1d Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Mon, 18 Jan 2021 10:58:07 +0800 Subject: [PATCH 074/697] fix build error --- src/Functions/map.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index f1c5a26ce7d..2561f06d9b8 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -139,15 +139,17 @@ public: }; +struct NameMapContains { static constexpr auto name = "mapContains"; }; + class FunctionMapContains : public IFunction { public: - static constexpr auto name = "mapContains"; + static constexpr auto name = NameMapContains::name; static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { - return name; + return NameMapContains::name; } size_t getNumberOfArguments() const override { return 2; } From e495284a0a5ecfeb0e0ead42f3eb8f57a8fac7aa Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 18 Jan 2021 16:43:00 +0300 Subject: [PATCH 075/697] Edit and translate to Russian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Внес поправки в английскую версию и выполнил перевод на русский язык --- .../database-engines/materialize-mysql.md | 31 ++-- docs/en/whats-new/roadmap.md | 2 +- docs/ru/engines/database-engines/index.md | 9 +- .../database-engines/materialize-mysql.md | 157 ++++++++++++++++++ docs/ru/whats-new/roadmap.md | 11 +- 5 files changed, 191 insertions(+), 19 deletions(-) create mode 100644 docs/ru/engines/database-engines/materialize-mysql.md diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index 964668a2d9d..e1fc83cdab5 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -5,15 +5,15 @@ toc_title: MaterializeMySQL # MaterializeMySQL {#materialize-mysql} - Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables. +Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables. - ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries. +ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries. ## Creating a Database {#creating-a-database} ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] +ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]; ``` **Engine Parameters** @@ -25,12 +25,12 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor ## Virtual columns {#virtual-columns} - When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns. +When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns. - - `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md). - - `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values: - - `1` — Row is not deleted, - - `-1` — Row is deleted. +- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md). +- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values: + - `1` — Row is not deleted, + - `-1` — Row is deleted. ## Data Types Support {#data_types-support} @@ -61,7 +61,7 @@ Other types are not supported. If MySQL table contains a column of such type, Cl MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). If ClickHouse cannot parse some DDL query, the query is ignored. -### Data Replication {#data-replication} +### Data Replication {#data-replication} MaterializeMySQL does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication: @@ -77,7 +77,7 @@ MaterializeMySQL does not support direct `INSERT`, `DELETE` and `UPDATE` queries - If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected. -- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default, so the deleted rows are not included into the result set. +- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not included into the result set. ### Index Conversion {#index-conversion} @@ -85,12 +85,12 @@ MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use [materialized views](../../sql-reference/statements/create/view.md#materialized). - **Notes** +**Notes** - - Rows with `_sign=-1` are not deleted physically from the tables. - - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine. - - Replication can be easily broken. - - Manual operations on database and tables are forbidden. +- Rows with `_sign=-1` are not deleted physically from the tables. +- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine. +- Replication can be easily broken. +- Manual operations on database and tables are forbidden. ## Examples of Use {#examples-of-use} @@ -105,6 +105,7 @@ mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16); mysql> UPDATE db.test SET c='Wow!', b=222; mysql> SELECT * FROM test; ``` + ```text +---+------+------+ | a | b | c | diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md index 9024afb046b..4abc36b5136 100644 --- a/docs/en/whats-new/roadmap.md +++ b/docs/en/whats-new/roadmap.md @@ -5,6 +5,6 @@ toc_title: Roadmap # Roadmap {#roadmap} -The roadmap for year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623). +The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623). {## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##} diff --git a/docs/ru/engines/database-engines/index.md b/docs/ru/engines/database-engines/index.md index d3dd729e302..16608d9fd29 100644 --- a/docs/ru/engines/database-engines/index.md +++ b/docs/ru/engines/database-engines/index.md @@ -4,7 +4,6 @@ toc_priority: 27 toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" --- - # Движки баз данных {#dvizhki-baz-dannykh} Движки баз данных обеспечивают работу с таблицами. @@ -13,4 +12,10 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" Также можно использовать следующие движки баз данных: -- [MySQL](mysql.md) +- [MySQL](../../engines/database-engines/mysql.md) + +- [Lazy](../../engines/database-engines/lazy.md) + +- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/) diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md new file mode 100644 index 00000000000..24ddd2218c5 --- /dev/null +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -0,0 +1,157 @@ +--- +toc_priority: 29 +toc_title: MaterializeMySQL +--- + +# MaterializeMySQL {#materialize-mysql} + +Создает базу данных ClickHouse со всеми таблицами, существующими в MySQL, и всеми данными в этих таблицах. + +Сервер ClickHouse работает как реплика MySQL. Он читает файл binlog и выполняет DDL and DML-запросы. + +## Создание базы данных {#creating-a-database} + +``` sql +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] +ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]; +``` + +**Параметры движка** + +- `host:port` — адрес сервера MySQL. +- `database` — имя базы данных на удалённом сервере. +- `user` — пользователь MySQL. +- `password` — пароль пользователя. + +## Виртуальные столбцы {#virtual-columns} + +При работе с движком баз данных `MaterializeMySQL` таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) используются с виртуальными столбцами `_sign` и `_version`. + +- `_version` — счетчик транзакций. Тип [UInt64](../../sql-reference/data-types/int-uint.md). +- `_sign` — метка удаления. Тип [Int8](../../sql-reference/data-types/int-uint.md). Возможные значения: + - `1` — строка не удалена, + - `-1` — строка удалена. + +## Поддержка типов данных {#data_types-support} + +| MySQL | ClickHouse | +|-------------------------|--------------------------------------------------------------| +| TINY | [Int8](../../sql-reference/data-types/int-uint.md) | +| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) | +| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) | +| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) | +| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) | +| FLOAT | [Float32](../../sql-reference/data-types/float.md) | +| DOUBLE | [Float64](../../sql-reference/data-types/float.md) | +| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | +| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | +| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | +| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| STRING | [String](../../sql-reference/data-types/string.md) | +| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | +| BLOB | [String](../../sql-reference/data-types/string.md) | + +Другие типы не поддерживаются. Если таблица MySQL содержит столбец такого типа, ClickHouse выдаст исключение "необработанный тип данных" и остановит репликацию. + +Тип [Nullable](../../sql-reference/data-types/nullable.md) поддерживается. + +## Особенности и рекомендации {#specifics-and-recommendations} + +### DDL-запросы {#ddl-queries} + +DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может спарсить какой-либо DDL-запрос, то он игнорируется. + +### Репликация данных {#data-replication} + +Движок MaterializeMySQL не поддерживает прямые запросы `INSERT`, `DELETE` и `UPDATE`. Однако они поддерживаются с точки зрения репликации данных: + +- Запрос `INSERT` в MySQL конвертируется в `INSERT` с `_sign=1`. + +- Запрос `DELETE` в MySQL конвертируется в `INSERT` с `_sign=-1`. + +- Запрос `UPDATE` в MySQL конвертируется в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`. + +### Выборка из таблиц движка MaterializeMySQL {#select} + +Запрос `SELECT` из таблиц движка MaterializeMySQL имеет некоторую специфику: + +- Если `_version` не указан в запросе `SELECT`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`. + +- Если `_sign` не указан в запросе `SELECT`, то по умолчанию используется `WHERE _sign=1`. Таким образом, удаленные строки не включаются в результирующий набор. + +### Индекс конверсии {#index-conversion} + +Секции `PRIMARY KEY` и `INDEX` в MySQL конвертируются в кортежи `ORDER BY` в таблицах ClickHouse. + +ClickHouse имеет только один физический порядок, который определяется секцией `ORDER BY`. Чтобы создать новый физический порядок, используйте [материализованные представления](../../sql-reference/statements/create/view.md#materialized). + +**Примечание** + +- Строки с `_sign=-1` физически не удаляются из таблиц. +- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`. +- Репликация может быть легко нарушена. +- Операции вручную с базами данных и таблицами запрещены. + +## Примеры использования {#examples-of-use} + +Запросы в MySQL: + +``` sql +mysql> CREATE DATABASE db; +mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT); +mysql> INSERT INTO db.test VALUES (1, 11), (2, 22); +mysql> DELETE FROM db.test WHERE a=1; +mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16); +mysql> UPDATE db.test SET c='Wow!', b=222; +mysql> SELECT * FROM test; +``` + +```text ++---+------+------+ +| a | b | c | ++---+------+------+ +| 2 | 222 | Wow! | ++---+------+------+ +``` + +База данных в ClickHouse, обмен данными с сервером MySQL: + +База данных и созданная таблица: + +``` sql +CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***'); +SHOW TABLES FROM mysql; +``` + +``` text +┌─name─┐ +│ test │ +└──────┘ +``` + +После вставки данных: + +``` sql +SELECT * FROM mysql.test; +``` + +``` text +┌─a─┬──b─┐ +│ 1 │ 11 │ +│ 2 │ 22 │ +└───┴────┘ +``` + +После удаления данных, добавления столбца и обновления: + +``` sql +SELECT * FROM mysql.test; +``` + +``` text +┌─a─┬───b─┬─c────┐ +│ 2 │ 222 │ Wow! │ +└───┴─────┴──────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) diff --git a/docs/ru/whats-new/roadmap.md b/docs/ru/whats-new/roadmap.md index 2c383b2ad5d..f80d210e19b 120000 --- a/docs/ru/whats-new/roadmap.md +++ b/docs/ru/whats-new/roadmap.md @@ -1 +1,10 @@ -../../en/whats-new/roadmap.md \ No newline at end of file +--- +toc_priority: 74 +toc_title: План +--- + +# План {#roadmap} + +План на 2021 год опубликован для открытого обсуждения [здесь](https://github.com/ClickHouse/ClickHouse/issues/17623). + +{## [Оригинальная статья](https://clickhouse.tech/docs/ru/roadmap/) ##} \ No newline at end of file From 8bafe9cca39e6da7b1198371c78f66fd55c94104 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Jan 2021 17:59:59 +0300 Subject: [PATCH 076/697] Support split for ActionsDAG. --- src/Interpreters/ActionsDAG.cpp | 206 ++++++++++++++++++++++++++++++++ src/Interpreters/ActionsDAG.h | 2 + 2 files changed, 208 insertions(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index fe0bba2bf3e..e145b7df13e 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -798,6 +798,212 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) return std::make_shared(std::move(first)); } +std::pair ActionsDAG::split(std::unordered_set split_nodes) const +{ + /// Split DAG into two parts. + /// (first_nodes, first_index) is a part which will have split_list in result. + /// (second_nodes, second_index) is a part which will have same index as current actions. + std::list second_nodes; + std::list first_nodes; + Index second_index; + Index first_index; + + /// List of nodes from current actions which are not inputs, but will be in second part. + std::vector new_inputs; + + struct Frame + { + const Node * node; + size_t next_child_to_visit = 0; + }; + + struct Data + { + bool needed_by_split_node = false; + bool visited = false; + bool used_in_result = false; + + /// Copies of node in one of the DAGs. + /// For COLUMN and INPUT both copies may exist. + Node * to_second = nullptr; + Node * to_first = nullptr; + }; + + std::stack stack; + std::unordered_map data; + + for (const auto & node : index) + data[node].used_in_result = true; + + /// DFS. Decide if node is needed by split. + for (const auto & node : nodes) + { + if (split_nodes.count(&node) == 0) + continue; + + auto & cur_data = data[&node]; + if (cur_data.needed_by_split_node) + continue; + + cur_data.needed_by_split_node = true; + stack.push({.node = &node}); + + while (!stack.empty()) + { + auto & cur_node = stack.top().node; + stack.pop(); + + for (const auto * child : cur_node->children) + { + auto & child_data = data[child]; + if (!child_data.needed_by_split_node) + { + child_data.needed_by_split_node = true; + stack.push({.node = child}); + } + } + } + } + + /// DFS. Move nodes to one of the DAGs. + for (const auto & node : nodes) + { + if (!data[&node].visited) + stack.push({.node = &node}); + + while (!stack.empty()) + { + auto & cur = stack.top(); + auto & cur_data = data[cur.node]; + + /// At first, visit all children. + while (cur.next_child_to_visit < cur.node->children.size()) + { + auto * child = cur.node->children[cur.next_child_to_visit]; + auto & child_data = data[child]; + + if (!child_data.visited) + { + stack.push({.node = child}); + break; + } + + ++cur.next_child_to_visit; + } + + /// Make a copy part. + if (cur.next_child_to_visit == cur.node->children.size()) + { + cur_data.visited = true; + stack.pop(); + + if (!cur_data.needed_by_split_node) + { + auto & copy = second_nodes.emplace_back(*cur.node); + cur_data.to_second = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + auto & child_data = data[child]; + + /// If children is not created, int may be from split part. + if (!child_data.to_second) + { + if (child->type == ActionType::COLUMN) /// Just create new node for COLUMN action. + { + child_data.to_second = &second_nodes.emplace_back(*child); + } + else + { + /// Node from first part is added as new input. + Node input_node; + input_node.type = ActionType::INPUT; + input_node.result_type = child->result_type; + input_node.result_name = child->result_name; + child_data.to_second = &second_nodes.emplace_back(std::move(input_node)); + + /// If it is already an input, it was created by other branch. + assert(child->type != ActionType::INPUT); + new_inputs.push_back(child); + } + } + + child = child_data.to_second; + } + + /// Every input should be in both DAGs. + if (copy.type == ActionType::INPUT) + { + auto & input_copy = first_nodes.emplace_back(*cur.node); + assert(cur_data.to_first == nullptr); + cur_data.to_first = &input_copy; + } + } + else + { + auto & copy = first_nodes.emplace_back(*cur.node); + cur_data.to_first = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + child = data[child].to_first; + assert(child != nullptr); + } + + if (cur_data.used_in_result || copy.type == ActionType::INPUT) + { + /// If this node is needed in result, add it as input. + Node input_node; + input_node.type = ActionType::INPUT; + input_node.result_type = node.result_type; + input_node.result_name = node.result_name; + cur_data.to_second = &second_nodes.emplace_back(std::move(input_node)); + + if (copy.type != ActionType::INPUT) + new_inputs.push_back(cur.node); + } + } + } + } + } + + for (auto * node : index) + second_index.insert(data[node].to_second); + + Inputs second_inputs; + Inputs first_inputs; + + for (auto * input : inputs) + { + const auto & cur = data[input]; + second_inputs.push_back(cur.to_second); + first_index.insert(cur.to_first); + + first_inputs.push_back(cur.to_first); + } + + for (const auto * input : new_inputs) + { + const auto & cur = data[input]; + second_inputs.push_back(cur.to_second); + first_index.insert(cur.to_first); + } + + auto first_actions = cloneEmpty(); + first_actions->nodes.swap(first_nodes); + first_actions->index.swap(first_index); + first_actions->inputs.swap(first_inputs); + + auto second_actions = cloneEmpty(); + second_actions->nodes.swap(second_nodes); + second_actions->index.swap(second_index); + second_actions->inputs.swap(second_inputs); + + return {std::move(first_actions), std::move(second_actions)}; +} + ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) { /// Split DAG into two parts. diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 6a26927374e..9ab00e14d2e 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -253,6 +253,8 @@ public: /// Otherwise, any two actions may be combined. static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second); + std::pair split(std::unordered_set split_nodes) const; + private: Node & addNode(Node node, bool can_replace = false); Node & getNode(const std::string & name); From 0d335144338408441f39e90e111d15e80bf89b5e Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 18 Jan 2021 18:24:17 +0300 Subject: [PATCH 077/697] Edit and translate to Russian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Внес поправки в английскую версию и выполнил перевод на русский язык. --- docs/en/operations/settings/settings.md | 6 +-- .../functions/string-functions.md | 2 +- .../mergetree-family/mergetree.md | 1 + docs/ru/operations/settings/settings.md | 10 +++++ .../functions/string-functions.md | 42 +++++++++++++++++++ .../sql-reference/statements/create/table.md | 12 +++++- 6 files changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index d3a4d50d21c..c6e13451cef 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2530,12 +2530,12 @@ See examples in [UNION](../../sql-reference/statements/select/union.md). ## data_type_default_nullable {#data_type_default_nullable} -Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). +Allows data type without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). Possible values: -- 1 — The data types in column definitions are set to `Nullable` by default. -- 0 — The data types in column definitions are set to not `Nullable` by default. +- 1 — The data type in column definition is set to `Nullable` by default. +- 0 — The data type in column definition is set to not `Nullable` by default. Default value: `0`. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 83f2705693a..2b93dd924a3 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -574,7 +574,7 @@ encodeXMLComponent(x) - `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). -**Returned value(s)** +**Returned value** - The sequence of characters with escape characters. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index c7bd64c4ab1..9b2a5eafca3 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -88,6 +88,7 @@ ORDER BY expr - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192. - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes). - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage). + - `max_parts_in_total` — максимальное количество кусков во всех партициях. - `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. - `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index aa549fc5776..9118af01b04 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2396,6 +2396,16 @@ WHERE 0 Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md). +## data_type_default_nullable {#data_type_default_nullable} + +Позволяет использовать по умолчанию тип данных [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) в определении столбца без явных модификаторов [NULL или NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers). + +Возможные значения: + +- 1 — тип данных в определении столбца задан по умолчанию как `Nullable`. +- 0 — тип данных в определении столбца не задан по умолчанию как `Nullable`. + +Значение по умолчанию: `0`. ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index cc488fb2d9c..cba5c1bc27f 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -555,4 +555,46 @@ SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 └─────┘ ``` +## encodeXMLComponent {#encode-xml-component} + +Экранирует символы для размещения строки в текстовом узле XML или атрибуте. + +Следующие пять встроенных XML-элементов будут заменены: `<`, `&`, `>`, `"`, `'`. + +**Синтаксис** + +``` sql +encodeXMLComponent(x) +``` + +**Параметры** + +- `x` — последовательность символов. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Последовательность символов, включая и экранируемые. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT encodeXMLComponent('Hello, "world"!'); +SELECT encodeXMLComponent('<123>'); +SELECT encodeXMLComponent('&clickhouse'); +SELECT encodeXMLComponent('\'foo\''); +``` + +Результат: + +``` text +Hello, "world"! +<123> +&clickhouse +'foo' +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index d54ec189a1a..eb93875d4ee 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -10,8 +10,8 @@ toc_title: "\u0422\u0430\u0431\u043b\u0438\u0446\u0430" ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2], + name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], + name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2], ... ) ENGINE = engine ``` @@ -44,6 +44,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines). +## Модификаторы NULL или NOT NULL {#null-modifiers} + +Модификаторы `NULL` and `NOT NULL` после установления типа данных в определении столбца позволяют или не позволяют ему быть типом [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). + +Если тип не `Nullable` и указан модификатор `NULL`, то столбец будет иметь тип `Nullable`; если `NOT NULL`, то не `Nullable`. Например, `INT NULL` то же, что и `Nullable(INT)`. Если тип `Nullable` и указаны модификаторы `NULL` или `NOT NULL`, то будет вызвано исключение. + +Смотрите также настройку [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable). + ### Значения по умолчанию {#create-default-values} В описании столбца, может быть указано выражение для значения по умолчанию, одного из следующих видов: From dcf1c1a07151ea845a556db485e6f54d51cc65d9 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 18 Jan 2021 18:36:14 +0300 Subject: [PATCH 078/697] Update roadmap.md and materialize-mysql.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Правлю ссылки. --- docs/en/engines/database-engines/materialize-mysql.md | 2 +- docs/ru/engines/database-engines/materialize-mysql.md | 2 +- docs/ru/whats-new/roadmap.md | 11 +---------- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index e1fc83cdab5..069fb9c55d3 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -13,7 +13,7 @@ ClickHouse server works as MySQL replica. It reads binlog and performs DDL and D ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]; +ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] ``` **Engine Parameters** diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index 24ddd2218c5..e899f453b5f 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -13,7 +13,7 @@ toc_title: MaterializeMySQL ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]; +ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] ``` **Параметры движка** diff --git a/docs/ru/whats-new/roadmap.md b/docs/ru/whats-new/roadmap.md index f80d210e19b..2c383b2ad5d 120000 --- a/docs/ru/whats-new/roadmap.md +++ b/docs/ru/whats-new/roadmap.md @@ -1,10 +1 @@ ---- -toc_priority: 74 -toc_title: План ---- - -# План {#roadmap} - -План на 2021 год опубликован для открытого обсуждения [здесь](https://github.com/ClickHouse/ClickHouse/issues/17623). - -{## [Оригинальная статья](https://clickhouse.tech/docs/ru/roadmap/) ##} \ No newline at end of file +../../en/whats-new/roadmap.md \ No newline at end of file From 7135c62204bfa8a83ebe9d80fd0654562d6cb886 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Jan 2021 19:12:16 +0300 Subject: [PATCH 079/697] Update tryLiftUpArrayJoin --- src/Interpreters/ActionsDAG.cpp | 164 ++++--------------------- src/Interpreters/ActionsDAG.h | 5 +- src/Processors/QueryPlan/QueryPlan.cpp | 18 +-- 3 files changed, 38 insertions(+), 149 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e145b7df13e..65241e1b5a3 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1004,183 +1004,69 @@ std::pair ActionsDAG::split(std::unordered_set ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const { - /// Split DAG into two parts. - /// (this_nodes, this_index) is a part which depends on ARRAY JOIN and stays here. - /// (split_nodes, split_index) is a part which will be moved before ARRAY JOIN. - std::list this_nodes; - std::list split_nodes; - Index this_index; - Index split_index; - Inputs new_inputs; struct Frame { - Node * node; + const Node * node; size_t next_child_to_visit = 0; }; - struct Data - { - bool depend_on_array_join = false; - bool visited = false; - bool used_in_result = false; - - /// Copies of node in one of the DAGs. - /// For COLUMN and INPUT both copies may exist. - Node * to_this = nullptr; - Node * to_split = nullptr; - }; + std::unordered_set split_nodes; + std::unordered_set visited_nodes; std::stack stack; - std::unordered_map data; - for (const auto & node : index) - data[node].used_in_result = true; - - /// DFS. Decide if node depends on ARRAY JOIN and move it to one of the DAGs. - for (auto & node : nodes) + /// DFS. Decide if node depends on ARRAY JOIN. + for (const auto & node : nodes) { - if (!data[&node].visited) - stack.push({.node = &node}); + if (visited_nodes.count(&node)) + continue; + + visited_nodes.insert(&node); + stack.push({.node = &node}); while (!stack.empty()) { auto & cur = stack.top(); - auto & cur_data = data[cur.node]; /// At first, visit all children. We depend on ARRAY JOIN if any child does. while (cur.next_child_to_visit < cur.node->children.size()) { auto * child = cur.node->children[cur.next_child_to_visit]; - auto & child_data = data[child]; - if (!child_data.visited) + if (visited_nodes.count(child) == 0) { + visited_nodes.insert(child); stack.push({.node = child}); break; } ++cur.next_child_to_visit; - if (child_data.depend_on_array_join) - cur_data.depend_on_array_join = true; } - /// Make a copy part. if (cur.next_child_to_visit == cur.node->children.size()) { + bool depend_on_array_join = false; if (cur.node->type == ActionType::INPUT && array_joined_columns.count(cur.node->result_name)) - cur_data.depend_on_array_join = true; + depend_on_array_join = true; + + for (const auto * child : cur.node->children) + { + if (split_nodes.count(child) == 0) + depend_on_array_join = true; + } + + if (!depend_on_array_join) + split_nodes.insert(cur.node); - cur_data.visited = true; stack.pop(); - - if (cur_data.depend_on_array_join) - { - auto & copy = this_nodes.emplace_back(*cur.node); - cur_data.to_this = © - - /// Replace children to newly created nodes. - for (auto & child : copy.children) - { - auto & child_data = data[child]; - - /// If children is not created, int may be from split part. - if (!child_data.to_this) - { - if (child->type == ActionType::COLUMN) /// Just create new node for COLUMN action. - { - child_data.to_this = &this_nodes.emplace_back(*child); - } - else - { - /// Node from split part is added as new input. - Node input_node; - input_node.type = ActionType::INPUT; - input_node.result_type = child->result_type; - input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name); - child_data.to_this = &this_nodes.emplace_back(std::move(input_node)); - - if (child->type != ActionType::INPUT) - new_inputs.push_back(child_data.to_this); - - /// This node is needed for current action, so put it to index also. - split_index.replace(child_data.to_split); - } - } - - child = child_data.to_this; - } - } - else - { - auto & copy = split_nodes.emplace_back(*cur.node); - cur_data.to_split = © - - /// Replace children to newly created nodes. - for (auto & child : copy.children) - { - child = data[child].to_split; - assert(child != nullptr); - } - - if (cur_data.used_in_result) - { - split_index.replace(©); - - /// If this node is needed in result, add it as input. - Node input_node; - input_node.type = ActionType::INPUT; - input_node.result_type = node.result_type; - input_node.result_name = node.result_name; - cur_data.to_this = &this_nodes.emplace_back(std::move(input_node)); - - if (copy.type != ActionType::INPUT) - new_inputs.push_back(cur_data.to_this); - } - } } } } - for (auto * node : index) - this_index.insert(data[node].to_this); - - /// Consider actions are empty if all nodes are constants or inputs. - bool split_actions_are_empty = true; - for (const auto & node : split_nodes) - if (!node.children.empty()) - split_actions_are_empty = false; - - if (split_actions_are_empty) - return {}; - - Inputs this_inputs; - Inputs split_inputs; - - for (auto * input : inputs) - { - const auto & cur = data[input]; - if (cur.to_this) - this_inputs.push_back(cur.to_this); - if (cur.to_split) - split_inputs.push_back(cur.to_split); - } - - this_inputs.insert(this_inputs.end(), new_inputs.begin(), new_inputs.end()); - - index.swap(this_index); - nodes.swap(this_nodes); - inputs.swap(this_inputs); - - auto split_actions = cloneEmpty(); - split_actions->nodes.swap(split_nodes); - split_actions->index.swap(split_index); - split_actions->inputs.swap(split_inputs); - split_actions->settings.project_input = false; - - return split_actions; + return split(split_nodes); } } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9ab00e14d2e..76ea3e30ee0 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -215,9 +215,8 @@ public: void projectInput() { settings.project_input = true; } void removeUnusedActions(const Names & required_names); - /// Splits actions into two parts. Returned half may be swapped with ARRAY JOIN. - /// Returns nullptr if no actions may be moved before ARRAY JOIN. - ActionsDAGPtr splitActionsBeforeArrayJoin(const NameSet & array_joined_columns); + /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN. + std::pair splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const; bool hasArrayJoin() const; bool hasStatefulFunctions() const; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 1b3ea16a213..6b5f5bc30b6 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -454,11 +454,11 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns); /// No actions can be moved before ARRAY JOIN. - if (!split_actions) + if (split_actions.first->empty()) return; /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. - if (expression->empty()) + if (split_actions.second->empty()) { auto expected_header = parent->getOutputStream().header; @@ -468,10 +468,10 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * if (expression_step) child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions)); + std::move(split_actions.first)); else child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions), + std::move(split_actions.first), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); @@ -487,10 +487,14 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * /// Expression/Filter -> ArrayJoin -> node -> Something node.step = std::make_unique(node.children.at(0)->step->getOutputStream(), - std::move(split_actions)); + std::move(split_actions.first)); array_join_step->updateInputStream(node.step->getOutputStream(), {}); - expression_step ? expression_step->updateInputStream(array_join_step->getOutputStream(), true) - : filter_step->updateInputStream(array_join_step->getOutputStream(), true); + + if (expression_step) + parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); + else + parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, + filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); } static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node) From 31a40880b4ffc9f37aa1f52f5ddec347f8baa604 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Jan 2021 22:56:34 +0300 Subject: [PATCH 080/697] Update tryLiftUpArrayJoin --- src/Interpreters/ActionsDAG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 65241e1b5a3..88aaa5ba7b7 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1066,7 +1066,10 @@ std::pair ActionsDAG::splitActionsBeforeArrayJoin } } - return split(split_nodes); + auto res = split(split_nodes); + /// Do not remove array joined columns if they are not used. + res.first->settings.project_input = false; + return res; } } From 60584362e73c4b401d8265a1d4fa1cfea0324a74 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 18 Jan 2021 22:57:13 +0300 Subject: [PATCH 081/697] Fixed typo --- docs/ru/operations/utilities/clickhouse-benchmark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 392ed859d58..4579418b63a 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -47,7 +47,7 @@ clickhouse-benchmark [keys] < queries_file; - `-p N`, `--port=N` — порт сервера. Значение по умолчанию: 9000. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. - `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: 0 (вечно будет повторяться). - `-r`, `--randomize` — случайный порядок выполнения запросов при наличии более одного входного запроса. -- `-s`, `--secure` — используется `TLS` соединения. +- `-s`, `--secure` — используется `TLS` соединение. - `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: 0 (лимит отключен). - `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. - `--cumulative` — выводит совокупность данных, а не данные за интервал. From 157e42a5b25023b8d60e008950050d16c27f89a4 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 18 Jan 2021 23:32:57 +0300 Subject: [PATCH 082/697] Fix the link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Исправил ссылку. --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 97b72f79c86..d767ceb2b8d 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -495,7 +495,7 @@ ClickHouse поддерживает [NULL](../sql-reference/syntax.md), кото В этом формате один объект JSON интерпретируется как одно значение. Если входные данные имеют несколько объектов JSON, разделенных запятой, то они будут интерпретироваться как отдельные строки. -В этом формате парситься может только таблица с единственным полем типа [String](../sql-reference/data-types/string.md). Остальные столбцы должны быть заданы как [DEFAULT](../sql-reference/statements/create/table.md#create-default-values) или [MATERIALIZED](../sql-reference/statements/create/table.md#create-default-values), либо отсутствовать. Как только вы соберете весь объект JSON в строку, для его обработки вы можете использовать [функции для работы с JSON](../sql-reference/functions/json-functions.md). +В этом формате парситься может только таблица с единственным полем типа [String](../sql-reference/data-types/string.md). Остальные столбцы должны быть заданы как `DEFAULT` или `MATERIALIZED`(смотрите раздел [Значения по умолчанию](../sql-reference/statements/create/table.md#create-default-values)), либо отсутствовать. Как только вы соберете весь объект JSON в строку, для его обработки вы можете использовать [функции для работы с JSON](../sql-reference/functions/json-functions.md). **Пример** From d914bf9552fd9aa265097a49009ce70e8785c881 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Jan 2021 23:34:46 +0300 Subject: [PATCH 083/697] Update tryLiftUpArrayJoin --- src/Interpreters/ActionsDAG.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 88aaa5ba7b7..d005e070f7f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -923,22 +923,12 @@ std::pair ActionsDAG::split(std::unordered_setresult_name; child_data.to_second = &second_nodes.emplace_back(std::move(input_node)); - /// If it is already an input, it was created by other branch. - assert(child->type != ActionType::INPUT); new_inputs.push_back(child); } } child = child_data.to_second; } - - /// Every input should be in both DAGs. - if (copy.type == ActionType::INPUT) - { - auto & input_copy = first_nodes.emplace_back(*cur.node); - assert(cur_data.to_first == nullptr); - cur_data.to_first = &input_copy; - } } else { @@ -952,7 +942,7 @@ std::pair ActionsDAG::split(std::unordered_set ActionsDAG::split(std::unordered_set ActionsDAG::split(std::unordered_set Date: Mon, 18 Jan 2021 23:52:33 +0300 Subject: [PATCH 084/697] Update tryLiftUpArrayJoin --- src/Interpreters/ActionsDAG.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index d005e070f7f..e601ef57c0c 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -929,6 +929,15 @@ std::pair ActionsDAG::split(std::unordered_set Date: Tue, 19 Jan 2021 00:54:01 +0300 Subject: [PATCH 085/697] Push actions result to begin of block. --- src/Interpreters/ActionsDAG.cpp | 6 +++--- src/Interpreters/ActionsDAG.h | 3 +++ src/Interpreters/ExpressionActions.cpp | 9 ++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e601ef57c0c..993986309ea 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -692,7 +692,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { /// first: x (1), x (2), y ==> x (2), z, x (3) /// second: x (1), x (2), x (3) ==> x (3), x (2), x (1) - /// merge: x (1), x (2), x (3), y =(first)=> x (3), y, x (2), z, x (4) =(second)=> y, z, x (4), x (2), x (3) + /// merge: x (1), x (2), x (3), y =(first)=> x (2), z, x (4), x (3) =(second)=> x (3), x (4), x (2), z /// Will store merged result in `first`. @@ -775,8 +775,8 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) } } - for (auto * node : second.index) - first.index.insert(node); + for (auto it = second.index.rbegin(); it != second.index.rend(); ++it) + first.index.prepend(*it); } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 76ea3e30ee0..6b873eaaa26 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -106,6 +106,8 @@ public: std::list::iterator end() { return list.end(); } std::list::const_iterator begin() const { return list.begin(); } std::list::const_iterator end() const { return list.end(); } + std::list::const_reverse_iterator rbegin() const { return list.rbegin(); } + std::list::const_reverse_iterator rend() const { return list.rend(); } std::list::const_iterator find(std::string_view key) const { auto it = map.find(key); @@ -119,6 +121,7 @@ public: /// If node with the same name exists, it is removed from map, but not list. /// It is expected and used for project(), when result may have several columns with the same name. void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } + void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); } /// If node with same name exists in index, replace it. Otherwise insert new node to index. void replace(Node * node) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index f0dcf830c82..3db0fbd833f 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -472,9 +472,16 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) block.erase(input); } + Block res; + for (auto pos : result_positions) if (execution_context.columns[pos].column) - block.insert(execution_context.columns[pos]); + res.insert(execution_context.columns[pos]); + + for (const auto & item : block) + res.insert(std::move(item)); + + block.swap(res); num_rows = execution_context.num_rows; } From 39cb72d1d49f59ce5cb9fb3ed99add88c58be93c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 19 Jan 2021 01:02:48 +0300 Subject: [PATCH 086/697] fix --- contrib/cassandra | 2 +- contrib/libuv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/cassandra b/contrib/cassandra index 2935f6f15fe..9cbc1a806df 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 2935f6f15fea889899750560aa6331e9119e9dd0 +Subproject commit 9cbc1a806df5d40fddbf84533b9873542c6513d8 diff --git a/contrib/libuv b/contrib/libuv index 84438304f41..e2e9b7e9f97 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28 +Subproject commit e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab From f7e61c1ed18fb1a88da0ab5157c48976c2cd62c0 Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 18 Jan 2021 17:03:31 -0500 Subject: [PATCH 087/697] Enabling all TestFlows modules. Increasing clickhouse container health check timeouts. --- .../docker-compose/clickhouse-service.yml | 8 +- .../docker-compose/clickhouse-service.yml | 2 +- .../docker-compose/clickhouse-service.yml | 2 +- .../ldap/authentication/regression.py | 6 +- .../authentication/tests/authentications.py | 4 +- .../docker-compose/clickhouse-service.yml | 2 +- .../external_user_directory/tests/common.py | 5 +- tests/testflows/ldap/regression.py | 3 +- tests/testflows/ldap/role_mapping/__init__.py | 0 .../ldap/role_mapping/configs/CA/ca.crt | 22 + .../ldap/role_mapping/configs/CA/ca.key | 30 + .../ldap/role_mapping/configs/CA/ca.srl | 1 + .../ldap/role_mapping/configs/CA/dhparam.pem | 8 + .../role_mapping/configs/CA/passphrase.txt | 1 + .../configs/clickhouse/common.xml | 6 + .../configs/clickhouse/config.d/logs.xml | 17 + .../configs/clickhouse/config.d/ports.xml | 5 + .../configs/clickhouse/config.d/remote.xml | 107 ++ .../configs/clickhouse/config.d/ssl.xml | 17 + .../configs/clickhouse/config.d/storage.xml | 20 + .../configs/clickhouse/config.d/zookeeper.xml | 10 + .../configs/clickhouse/config.xml | 442 +++++ .../configs/clickhouse/ssl/dhparam.pem | 8 + .../configs/clickhouse/ssl/server.crt | 19 + .../configs/clickhouse/ssl/server.key | 28 + .../role_mapping/configs/clickhouse/users.xml | 133 ++ .../configs/clickhouse1/config.d/macros.xml | 8 + .../configs/clickhouse2/config.d/macros.xml | 8 + .../configs/clickhouse3/config.d/macros.xml | 8 + .../configs/ldap1/config/export.ldif | 64 + .../role_mapping/configs/ldap2/certs/ca.crt | 22 + .../configs/ldap2/certs/dhparam.pem | 5 + .../role_mapping/configs/ldap2/certs/ldap.crt | 20 + .../role_mapping/configs/ldap2/certs/ldap.csr | 17 + .../role_mapping/configs/ldap2/certs/ldap.key | 27 + .../configs/ldap2/config/export.ldif | 64 + .../role_mapping/configs/ldap3/certs/ca.crt | 22 + .../configs/ldap3/certs/dhparam.pem | 5 + .../role_mapping/configs/ldap3/certs/ldap.crt | 20 + .../role_mapping/configs/ldap3/certs/ldap.csr | 17 + .../role_mapping/configs/ldap3/certs/ldap.key | 27 + .../configs/ldap3/config/export.ldif | 64 + .../role_mapping/configs/ldap4/certs/ca.crt | 22 + .../configs/ldap4/certs/dhparam.pem | 5 + .../role_mapping/configs/ldap4/certs/ldap.crt | 20 + .../role_mapping/configs/ldap4/certs/ldap.csr | 17 + .../role_mapping/configs/ldap4/certs/ldap.key | 27 + .../configs/ldap4/config/export.ldif | 64 + .../configs/ldap5/config/export.ldif | 64 + .../configs/ldap5/ldap2/certs/ca.crt | 22 + .../configs/ldap5/ldap2/certs/dhparam.pem | 5 + .../configs/ldap5/ldap2/certs/ldap.crt | 20 + .../configs/ldap5/ldap2/certs/ldap.csr | 17 + .../configs/ldap5/ldap2/certs/ldap.key | 27 + .../configs/ldap5/ldap2/config/export.ldif | 64 + .../docker-compose/clickhouse-service.yml | 28 + .../docker-compose/docker-compose.yml | 162 ++ .../docker-compose/openldap-service.yml | 40 + .../docker-compose/zookeeper-service.yml | 18 + .../testflows/ldap/role_mapping/regression.py | 47 + .../role_mapping/requirements/__init__.py | 1 + .../role_mapping/requirements/requirements.py | 1475 +++++++++++++++++ .../ldap/role_mapping/tests/common.py | 252 +++ .../ldap/role_mapping/tests/mapping.py | 1372 +++++++++++++++ .../ldap/role_mapping/tests/server_config.py | 78 + .../docker-compose/clickhouse-service.yml | 2 +- tests/testflows/regression.py | 9 +- 67 files changed, 5140 insertions(+), 22 deletions(-) create mode 100644 tests/testflows/ldap/role_mapping/__init__.py create mode 100644 tests/testflows/ldap/role_mapping/configs/CA/ca.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/CA/ca.key create mode 100644 tests/testflows/ldap/role_mapping/configs/CA/ca.srl create mode 100644 tests/testflows/ldap/role_mapping/configs/CA/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/CA/passphrase.txt create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/logs.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ports.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/remote.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ssl.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/storage.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/zookeeper.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.key create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse1/config.d/macros.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse2/config.d/macros.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/clickhouse3/config.d/macros.xml create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap1/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/certs/ca.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/certs/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.csr create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.key create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap2/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/certs/ca.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/certs/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.csr create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.key create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap3/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/certs/ca.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/certs/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.csr create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.key create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap4/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ca.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/dhparam.pem create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.crt create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.csr create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.key create mode 100644 tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/config/export.ldif create mode 100644 tests/testflows/ldap/role_mapping/docker-compose/clickhouse-service.yml create mode 100644 tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml create mode 100644 tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml create mode 100644 tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml create mode 100755 tests/testflows/ldap/role_mapping/regression.py create mode 100644 tests/testflows/ldap/role_mapping/requirements/__init__.py create mode 100644 tests/testflows/ldap/role_mapping/requirements/requirements.py create mode 100644 tests/testflows/ldap/role_mapping/tests/common.py create mode 100644 tests/testflows/ldap/role_mapping/tests/mapping.py create mode 100644 tests/testflows/ldap/role_mapping/tests/server_config.py diff --git a/tests/testflows/aes_encryption/docker-compose/clickhouse-service.yml b/tests/testflows/aes_encryption/docker-compose/clickhouse-service.yml index 9787b37abbb..0789decf022 100644 --- a/tests/testflows/aes_encryption/docker-compose/clickhouse-service.yml +++ b/tests/testflows/aes_encryption/docker-compose/clickhouse-service.yml @@ -18,10 +18,10 @@ services: entrypoint: bash -c "clickhouse server --config-file=/etc/clickhouse-server/config.xml --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" healthcheck: test: clickhouse client --query='select 1' - interval: 3s - timeout: 2s - retries: 40 - start_period: 2s + interval: 10s + timeout: 10s + retries: 10 + start_period: 300s cap_add: - SYS_PTRACE security_opt: diff --git a/tests/testflows/example/docker-compose/clickhouse-service.yml b/tests/testflows/example/docker-compose/clickhouse-service.yml index 2a56876c72e..0789decf022 100644 --- a/tests/testflows/example/docker-compose/clickhouse-service.yml +++ b/tests/testflows/example/docker-compose/clickhouse-service.yml @@ -20,7 +20,7 @@ services: test: clickhouse client --query='select 1' interval: 10s timeout: 10s - retries: 3 + retries: 10 start_period: 300s cap_add: - SYS_PTRACE diff --git a/tests/testflows/ldap/authentication/docker-compose/clickhouse-service.yml b/tests/testflows/ldap/authentication/docker-compose/clickhouse-service.yml index 2a56876c72e..0789decf022 100644 --- a/tests/testflows/ldap/authentication/docker-compose/clickhouse-service.yml +++ b/tests/testflows/ldap/authentication/docker-compose/clickhouse-service.yml @@ -20,7 +20,7 @@ services: test: clickhouse client --query='select 1' interval: 10s timeout: 10s - retries: 3 + retries: 10 start_period: 300s cap_add: - SYS_PTRACE diff --git a/tests/testflows/ldap/authentication/regression.py b/tests/testflows/ldap/authentication/regression.py index 50677d78cb8..ff004a998ca 100755 --- a/tests/testflows/ldap/authentication/regression.py +++ b/tests/testflows/ldap/authentication/regression.py @@ -23,11 +23,7 @@ xfails = { "connection protocols/starttls with custom port": [(Fail, "it seems that starttls is not enabled by default on custom plain-text ports in LDAP server")], "connection protocols/tls cipher suite": - [(Fail, "can't get it to work")], - "external user directory/user authentications/valid verification cooldown value ldap unavailable": - [(Fail, "flaky, ask Vitaly Zakaznikov, Telegram @vzakaznikov")], - "user authentications/rbac=True/verification cooldown/verification cooldown performance": - [(Fail, "flaky, ask Vitaly Zakaznikov, Telegram @vzakaznikov")] + [(Fail, "can't get it to work")] } @TestFeature diff --git a/tests/testflows/ldap/authentication/tests/authentications.py b/tests/testflows/ldap/authentication/tests/authentications.py index 46bcae000b8..b54cc880bbc 100644 --- a/tests/testflows/ldap/authentication/tests/authentications.py +++ b/tests/testflows/ldap/authentication/tests/authentications.py @@ -131,7 +131,7 @@ def login_after_user_is_deleted_from_ldap(self, server, rbac=False): user = add_user_to_ldap(**user) with ldap_authenticated_users({"username": user["cn"], "server": server}, config_file=f"ldap_users_{getuid()}.xml", - restart=True, rbac=rbac): + restart=True, rbac=rbac): login_and_execute_query(username=user["cn"], password=user["userpassword"]) with When("I delete this user from LDAP"): @@ -202,7 +202,7 @@ def login_after_user_cn_changed_in_ldap(self, server, rbac=False): user = add_user_to_ldap(**user) with ldap_authenticated_users({"username": user["cn"], "server": server}, - config_file=f"ldap_users_{getuid()}.xml", restart=True, rbac=rbac): + config_file=f"ldap_users_{getuid()}.xml", restart=True, rbac=rbac): login_and_execute_query(username=user["cn"], password=user["userpassword"]) with When("I change user password in LDAP"): diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/clickhouse-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/clickhouse-service.yml index 2a56876c72e..0789decf022 100644 --- a/tests/testflows/ldap/external_user_directory/docker-compose/clickhouse-service.yml +++ b/tests/testflows/ldap/external_user_directory/docker-compose/clickhouse-service.yml @@ -20,7 +20,7 @@ services: test: clickhouse client --query='select 1' interval: 10s timeout: 10s - retries: 3 + retries: 10 start_period: 300s cap_add: - SYS_PTRACE diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index e1ee4f99545..e5980640721 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -96,7 +96,10 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d my_ldap_server - my_user + + + + ``` diff --git a/tests/testflows/ldap/regression.py b/tests/testflows/ldap/regression.py index 9cc9aa85f93..579223c4b35 100755 --- a/tests/testflows/ldap/regression.py +++ b/tests/testflows/ldap/regression.py @@ -16,6 +16,7 @@ def regression(self, local, clickhouse_binary_path, parallel=None, stress=None): Feature(test=load("ldap.authentication.regression", "regression"))(**args) Feature(test=load("ldap.external_user_directory.regression", "regression"))(**args) + Feature(test=load("ldap.role_mapping.regression", "regression"))(**args) if main(): - regression() \ No newline at end of file + regression() diff --git a/tests/testflows/ldap/role_mapping/__init__.py b/tests/testflows/ldap/role_mapping/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/testflows/ldap/role_mapping/configs/CA/ca.crt b/tests/testflows/ldap/role_mapping/configs/CA/ca.crt new file mode 100644 index 00000000000..8c71e3afc91 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/CA/ca.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIUJBqw2dHM2DDCZjYSkPOESlvDH6swDQYJKoZIhvcNAQEL +BQAwWjELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQHDAZPdHRhd2Ex +ETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTENMAsGA1UEAwwEcm9vdDAe +Fw0yMDA2MTExOTAzNDhaFw0zMDA2MDkxOTAzNDhaMFoxCzAJBgNVBAYTAkNBMQsw +CQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYDVQQKDAhBbHRpbml0eTEL +MAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC9Irr0zGV+HCI2fZ0ht4hR5It4Sbjz4RwZV8ENRP/+TEz8l9eK +J6ygxhKX7SMYzIs/jS9Gsq4plX1r2ujW1qRf8yLpR4+dGLP+jBRi1drj0XjZXosT +SERjWzgPauWxL9LN8+l26eBAqz6fw5e0W8WRSTgf5iGiCcKOTmaATIUjP0CdfWKK +qpktI4vhe++CXZFJ3usR+8KZ/FwwbCLJM/3J2HnbcXfcaYPYvr1tfqLudKSTbG9H +M3+AVwjctdesc/0sbd51Zsm0ClQptMbuKnDCYauGg61kNkgbgPgRmH9Pzo67DtxF +/WW+PtOzq8xLOifciQ9Piboy9QBSQZGwf4wzAgMBAAGjUzBRMB0GA1UdDgQWBBSi +njya0RDozx3OZTLYFpwqYnlpIDAfBgNVHSMEGDAWgBSinjya0RDozx3OZTLYFpwq +YnlpIDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBAD7VyFg7F +U1C25KFvtauchAOjCW6w7U/b3z1dVZvcQ88/kH1VsLUcfGixlSilUEfPTJsi7OA0 +R5BQdh2GGcjUJv4iqEFGU05KvMVmRRKn08P62+ZhJxKMxG26VzcliRZzCMkI6d0W +lFwI6nM45yeqdHVh5k4xbuJzqpbD9BtXXLI+/Ra9Fx8S9ETA3GdidpZLU5P1VLxq +UuedfqyAVWZXpr6TAURGxouRmRzul9yFzbSUex+MLEIPrstjtEwV3+tBQZJz9xAS +TVPj+Nv3LO7GCq54bdwkq1ioWbSL2hEmABkj6kdW/JwmfhGHf/2rirDVMzrTYw07 +dFJfAZC+FEsv +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/CA/ca.key b/tests/testflows/ldap/role_mapping/configs/CA/ca.key new file mode 100644 index 00000000000..e7a7f664dcf --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/CA/ca.key @@ -0,0 +1,30 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,D06B9754A2069EBB4E77065DC9B605A1 + +FJT794Z6AUuUB5Vp5W2iR6zzCvQUg2dtKoE+xhFdbgC7lmSfA2W/O9fx15Il67Yj +Bbpm9Y6yteUSDQpJrvBdkhXeBkYEa5K1CA+0Jdx98nzwP3KBhHNxVVrTWRc5kniB +LMV3iBQEbAafxgL7gN+EWr3eV7w7ZSqT7D5br/mlBALU62gv2UzwTXLu1CgyNWMC +HIPjIX50Zga+BnhZhtQvM4Yj1gOsn+X6AaEZ3KjTfCDqthYQf2ldswW4gAlPAq83 ++INq9Spx+QG97Z+1XO2DmmGTZL0z+OFLT+3y26/UcftM26ODY09Dcf3gt0n6RIUV +0KsD1qQL0ppu4CHVnbIkOKMBe86qBl+kG8FVmyhgZ8D9ULlF1tpyTVKvHR82V2B5 +ztbc5EY1Fhb+r7OVVJlbCeo/bWmWybODZrpN49x5gGZpM3+8ApaHupGZ+cRFkQKG +rDpqC5gflT3WwFNxtP5noWcV+Gzb3riXNM3c8G5aIpLZwmmaTLK9ahKqMcq4Ljf+ +hir8kuCMqIKt3m7Ceoj4wAHSP8xO0y/cc1WYNb3CI0Emk795aR6IPUw4vDEXHG27 +OLoCJTvl/JKRWJGkdQx8wKAs/uw/qwtbhYoQJccTjfvy4NXH3tpSgxCE8OTWuEch +TAN8ra1PDGAUu+1MeT5gZ9uI1BEU6hXMME4mVRpJdcmw9MVy3V+B6rkUqX3kFAfR +e2ueF5qgIp+A4/UlVe5cKdWAQxu4BnUESLooA7cbgcLypdao9bRx9bXH8S3aNgxW +IdgICpc/v8wAX2yqMe191KgR9Vh1p0RCw/kEGVgWfY/IaQpsaYuq5quZbvr/fN5T +d++ySAMaPysaCadLUdZJLw56uk4Y+PYzR+ygjTX9dCCHedrAU8RYM55FJ/fyD3bQ +Hn9/n7PZyWy6u/TYt6dhlcYxaS3Opzw4eAQB8tGZJRYQ3AKpHpTEC57lXoMnUPKo ++nBmb0+YulylMZdns0WIBJlcv6qzIaNhDMrjyi18n1ezzPIGH7ivUjoXy2FL23q5 +f3aqJK4UUDEDkC8IeZkS+ykYxnohjFDhUyBe5gjryLqdMdy9EerehCWPf425AztX +c/EWPzDl46qmxWhugOlz3Fiw95VlYu0MUDRayHuZiYPplgJypChuU4EHJ+q8V2z3 +BwjSo1bD4nfc8f68qEOtdZ1u/ClcolMwlZQYDJz/DiE4JOcd2Gx4QSF5vaInm0/4 +mMj/ZWna4DAYFbH8IGh7xUPDqeIWhBYlgrD69ajKyay5Vu3La/d2QW20BhX35Ro2 +ZJVR+lfioMmxn4y481H2pv+5gOlGwh02Oa8qLhZBb8W+DvFShNk6mk87eCForFFT +CDgmvfsC/cS2wZkcFTecq6vbjFlt+OF13NCKlcO3wCm44D+bwVPeMrU6HycCVQw7 +SASrnP/th5sJbv11byb2lKgVdVHWk090bqnDwB9H2hGIb9JnPC9ZpaL/mocYyzTi +H9fcBrMYkL09FJGr3Uff7qEY4XQTMlLadXue3iKd19PRgV8cRyKp37MYI9/3iLwv +eYHLtMfrifZahf1ksOPeBphnlfzWo9qqfooUCaGxfSlNPUHhrHZ4aMiRyTE8Xeh2 +-----END RSA PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/CA/ca.srl b/tests/testflows/ldap/role_mapping/configs/CA/ca.srl new file mode 100644 index 00000000000..66feb9c8a35 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/CA/ca.srl @@ -0,0 +1 @@ +227B125D27B6B1A4B5955361365DF8EC2D7098C1 diff --git a/tests/testflows/ldap/role_mapping/configs/CA/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/CA/dhparam.pem new file mode 100644 index 00000000000..554d75696ee --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/CA/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEA1iatTn4jdw1WIu09qeLj8OEeLhzG/w2lI4RUeJT9nU+WTwegpvLN +/MvrIMIKHRmItyxgraYFau2moC7RKm7OKLmFt6e34QeMvM1vXpuwQav6mfp8GsYL +mEIw5riFcB73E32NN3g7qmfmurkTF28BohmqhuQp2et7FNoGBKQ6ePZzGHWil3yG +nEnCwyK0o3eP2IEytx2N50uUWVdfg3MN34L3wqpUivArrjBkoMpqm3/V3wdfoYG9 +ZQkH0gIxT/2FIixCLGlfBsJ1qA/Apz1BJZbGqVu5M5iiQmq+LWN5JLS3xYai4wJL +rIY8DhjbciSNVWkwTJHzaLwIQa9a6p6mUwIBAg== +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/CA/passphrase.txt b/tests/testflows/ldap/role_mapping/configs/CA/passphrase.txt new file mode 100644 index 00000000000..2cf58b2364c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/CA/passphrase.txt @@ -0,0 +1 @@ +altinity diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml new file mode 100644 index 00000000000..df952b28c82 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml @@ -0,0 +1,6 @@ + + Europe/Moscow + 0.0.0.0 + /var/lib/clickhouse/ + /var/lib/clickhouse/tmp/ + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/logs.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/logs.xml new file mode 100644 index 00000000000..bdf1bbc11c1 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/logs.xml @@ -0,0 +1,17 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + system + part_log
+ 500 +
+
diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ports.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ports.xml new file mode 100644 index 00000000000..fbc6cea74c0 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ports.xml @@ -0,0 +1,5 @@ + + + 8443 + 9440 + \ No newline at end of file diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/remote.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/remote.xml new file mode 100644 index 00000000000..51be2a6e8e3 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/remote.xml @@ -0,0 +1,107 @@ + + + + + + true + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + + + + + true + + clickhouse1 + 9440 + 1 + + + clickhouse2 + 9440 + 1 + + + clickhouse3 + 9440 + 1 + + + + + + + clickhouse1 + 9000 + + + + + clickhouse2 + 9000 + + + + + clickhouse3 + 9000 + + + + + + + clickhouse1 + 9440 + 1 + + + + + clickhouse2 + 9440 + 1 + + + + + clickhouse3 + 9440 + 1 + + + + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ssl.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ssl.xml new file mode 100644 index 00000000000..ca65ffd5e04 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/ssl.xml @@ -0,0 +1,17 @@ + + + + /etc/clickhouse-server/ssl/server.crt + /etc/clickhouse-server/ssl/server.key + none + true + + + true + none + + AcceptCertificateHandler + + + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/storage.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/storage.xml new file mode 100644 index 00000000000..618fd6b6d24 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/storage.xml @@ -0,0 +1,20 @@ + + + + + + 1024 + + + + + + + default + + + + + + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/zookeeper.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/zookeeper.xml new file mode 100644 index 00000000000..96270e7b645 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.d/zookeeper.xml @@ -0,0 +1,10 @@ + + + + + zookeeper + 2181 + + 15000 + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml new file mode 100644 index 00000000000..e28a0c8e255 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml @@ -0,0 +1,442 @@ + + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + + 8123 + 9000 + + + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + 4096 + 3 + + + 100 + + + + + + 8589934592 + + + 5368709120 + + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + /var/lib/clickhouse/user_files/ + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + default + + + + + + default + + + + + + + + + false + + + + + + + + localhost + 9000 + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + +
diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/dhparam.pem new file mode 100644 index 00000000000..2e6cee0798d --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR +XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5 +Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb +9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf +slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb +dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg== +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.crt b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.crt new file mode 100644 index 00000000000..7ade2d96273 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIC/TCCAeWgAwIBAgIJANjx1QSR77HBMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAgFw0xODA3MzAxODE2MDhaGA8yMjkyMDUxNDE4MTYwOFow +FDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEAs9uSo6lJG8o8pw0fbVGVu0tPOljSWcVSXH9uiJBwlZLQnhN4SFSFohfI +4K8U1tBDTnxPLUo/V1K9yzoLiRDGMkwVj6+4+hE2udS2ePTQv5oaMeJ9wrs+5c9T +4pOtlq3pLAdm04ZMB1nbrEysceVudHRkQbGHzHp6VG29Fw7Ga6YpqyHQihRmEkTU +7UCYNA+Vk7aDPdMS/khweyTpXYZimaK9f0ECU3/VOeG3fH6Sp2X6FN4tUj/aFXEj +sRmU5G2TlYiSIUMF2JPdhSihfk1hJVALrHPTU38SOL+GyyBRWdNcrIwVwbpvsvPg +pryMSNxnpr0AK0dFhjwnupIv5hJIOQIDAQABo1AwTjAdBgNVHQ4EFgQUjPLb3uYC +kcamyZHK4/EV8jAP0wQwHwYDVR0jBBgwFoAUjPLb3uYCkcamyZHK4/EV8jAP0wQw +DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAM/ocuDvfPus/KpMVD51j +4IdlU8R0vmnYLQ+ygzOAo7+hUWP5j0yvq4ILWNmQX6HNvUggCgFv9bjwDFhb/5Vr +85ieWfTd9+LTjrOzTw4avdGwpX9G+6jJJSSq15tw5ElOIFb/qNA9O4dBiu8vn03C +L/zRSXrARhSqTW5w/tZkUcSTT+M5h28+Lgn9ysx4Ff5vi44LJ1NnrbJbEAIYsAAD ++UA+4MBFKx1r6hHINULev8+lCfkpwIaeS8RL+op4fr6kQPxnULw8wT8gkuc8I4+L +P9gg/xDHB44T3ADGZ5Ib6O0DJaNiToO6rnoaaxs0KkotbvDWvRoxEytSbXKoYjYp +0g== +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.key b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.key new file mode 100644 index 00000000000..f0fb61ac443 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/ssl/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCz25KjqUkbyjyn +DR9tUZW7S086WNJZxVJcf26IkHCVktCeE3hIVIWiF8jgrxTW0ENOfE8tSj9XUr3L +OguJEMYyTBWPr7j6ETa51LZ49NC/mhox4n3Cuz7lz1Pik62WreksB2bThkwHWdus +TKxx5W50dGRBsYfMenpUbb0XDsZrpimrIdCKFGYSRNTtQJg0D5WTtoM90xL+SHB7 +JOldhmKZor1/QQJTf9U54bd8fpKnZfoU3i1SP9oVcSOxGZTkbZOViJIhQwXYk92F +KKF+TWElUAusc9NTfxI4v4bLIFFZ01ysjBXBum+y8+CmvIxI3GemvQArR0WGPCe6 +ki/mEkg5AgMBAAECggEATrbIBIxwDJOD2/BoUqWkDCY3dGevF8697vFuZKIiQ7PP +TX9j4vPq0DfsmDjHvAPFkTHiTQXzlroFik3LAp+uvhCCVzImmHq0IrwvZ9xtB43f +7Pkc5P6h1l3Ybo8HJ6zRIY3TuLtLxuPSuiOMTQSGRL0zq3SQ5DKuGwkz+kVjHXUN +MR2TECFwMHKQ5VLrC+7PMpsJYyOMlDAWhRfUalxC55xOXTpaN8TxNnwQ8K2ISVY5 +212Jz/a4hn4LdwxSz3Tiu95PN072K87HLWx3EdT6vW4Ge5P/A3y+smIuNAlanMnu +plHBRtpATLiTxZt/n6npyrfQVbYjSH7KWhB8hBHtaQKBgQDh9Cq1c/KtqDtE0Ccr +/r9tZNTUwBE6VP+3OJeKdEdtsfuxjOCkS1oAjgBJiSDOiWPh1DdoDeVZjPKq6pIu +Mq12OE3Doa8znfCXGbkSzEKOb2unKZMJxzrz99kXt40W5DtrqKPNb24CNqTiY8Aa +CjtcX+3weat82VRXvph6U8ltMwKBgQDLxjiQQzNoY7qvg7CwJCjf9qq8jmLK766g +1FHXopqS+dTxDLM8eJSRrpmxGWJvNeNc1uPhsKsKgotqAMdBUQTf7rSTbt4MyoH5 +bUcRLtr+0QTK9hDWMOOvleqNXha68vATkohWYfCueNsC60qD44o8RZAS6UNy3ENq +cM1cxqe84wKBgQDKkHutWnooJtajlTxY27O/nZKT/HA1bDgniMuKaz4R4Gr1PIez +on3YW3V0d0P7BP6PWRIm7bY79vkiMtLEKdiKUGWeyZdo3eHvhDb/3DCawtau8L2K +GZsHVp2//mS1Lfz7Qh8/L/NedqCQ+L4iWiPnZ3THjjwn3CoZ05ucpvrAMwKBgB54 +nay039MUVq44Owub3KDg+dcIU62U+cAC/9oG7qZbxYPmKkc4oL7IJSNecGHA5SbU +2268RFdl/gLz6tfRjbEOuOHzCjFPdvAdbysanpTMHLNc6FefJ+zxtgk9sJh0C4Jh +vxFrw9nTKKzfEl12gQ1SOaEaUIO0fEBGbe8ZpauRAoGAMAlGV+2/K4ebvAJKOVTa +dKAzQ+TD2SJmeR1HZmKDYddNqwtZlzg3v4ZhCk4eaUmGeC1Bdh8MDuB3QQvXz4Dr +vOIP4UVaOr+uM+7TgAgVnP4/K6IeJGzUDhX93pmpWhODfdu/oojEKVcpCojmEmS1 +KCBtmIrQLqzMpnBpLNuSY+Q= +-----END PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml new file mode 100644 index 00000000000..86b2cd9e1e3 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml @@ -0,0 +1,133 @@ + + + + + + + + 10000000000 + + + 0 + + + random + + + + + 1 + + + + + + + + + + + + + ::/0 + + + + default + + + default + + + 1 + + + + + + + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse1/config.d/macros.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse1/config.d/macros.xml new file mode 100644 index 00000000000..6cdcc1b440c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse1/config.d/macros.xml @@ -0,0 +1,8 @@ + + + + clickhouse1 + 01 + 01 + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse2/config.d/macros.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse2/config.d/macros.xml new file mode 100644 index 00000000000..a114a9ce4ab --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse2/config.d/macros.xml @@ -0,0 +1,8 @@ + + + + clickhouse2 + 01 + 02 + + diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse3/config.d/macros.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse3/config.d/macros.xml new file mode 100644 index 00000000000..904a27b0172 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/clickhouse3/config.d/macros.xml @@ -0,0 +1,8 @@ + + + + clickhouse3 + 01 + 03 + + diff --git a/tests/testflows/ldap/role_mapping/configs/ldap1/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap1/config/export.ldif new file mode 100644 index 00000000000..621dd32ca0c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap1/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user1,ou=users,dc=company,dc=com +dn: cn=user1,ou=users,dc=company,dc=com +cn: user1 +gidnumber: 501 +givenname: John +homedirectory: /home/users/user1 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User +uid: user1 +uidnumber: 1101 +userpassword: user1 diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ca.crt b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ca.crt new file mode 100644 index 00000000000..8c71e3afc91 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ca.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIUJBqw2dHM2DDCZjYSkPOESlvDH6swDQYJKoZIhvcNAQEL +BQAwWjELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQHDAZPdHRhd2Ex +ETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTENMAsGA1UEAwwEcm9vdDAe +Fw0yMDA2MTExOTAzNDhaFw0zMDA2MDkxOTAzNDhaMFoxCzAJBgNVBAYTAkNBMQsw +CQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYDVQQKDAhBbHRpbml0eTEL +MAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC9Irr0zGV+HCI2fZ0ht4hR5It4Sbjz4RwZV8ENRP/+TEz8l9eK +J6ygxhKX7SMYzIs/jS9Gsq4plX1r2ujW1qRf8yLpR4+dGLP+jBRi1drj0XjZXosT +SERjWzgPauWxL9LN8+l26eBAqz6fw5e0W8WRSTgf5iGiCcKOTmaATIUjP0CdfWKK +qpktI4vhe++CXZFJ3usR+8KZ/FwwbCLJM/3J2HnbcXfcaYPYvr1tfqLudKSTbG9H +M3+AVwjctdesc/0sbd51Zsm0ClQptMbuKnDCYauGg61kNkgbgPgRmH9Pzo67DtxF +/WW+PtOzq8xLOifciQ9Piboy9QBSQZGwf4wzAgMBAAGjUzBRMB0GA1UdDgQWBBSi +njya0RDozx3OZTLYFpwqYnlpIDAfBgNVHSMEGDAWgBSinjya0RDozx3OZTLYFpwq +YnlpIDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBAD7VyFg7F +U1C25KFvtauchAOjCW6w7U/b3z1dVZvcQ88/kH1VsLUcfGixlSilUEfPTJsi7OA0 +R5BQdh2GGcjUJv4iqEFGU05KvMVmRRKn08P62+ZhJxKMxG26VzcliRZzCMkI6d0W +lFwI6nM45yeqdHVh5k4xbuJzqpbD9BtXXLI+/Ra9Fx8S9ETA3GdidpZLU5P1VLxq +UuedfqyAVWZXpr6TAURGxouRmRzul9yFzbSUex+MLEIPrstjtEwV3+tBQZJz9xAS +TVPj+Nv3LO7GCq54bdwkq1ioWbSL2hEmABkj6kdW/JwmfhGHf/2rirDVMzrTYw07 +dFJfAZC+FEsv +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/certs/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/dhparam.pem new file mode 100644 index 00000000000..0a96faffd62 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/dhparam.pem @@ -0,0 +1,5 @@ +-----BEGIN DH PARAMETERS----- +MIGHAoGBAJitt2hhnpDViQ5ko2ipBMdjy+bZ6FR/WdZ987R7lQvBkKehPXmxtEyV +AO6ofv5CZSDJokc5bUeBOAtg0EhMTCH82uPdwQvt58jRXcxXBg4JTjkx+oW9LBv2 +FdZsbaX8+SYivmiZ0Jp8T/HBm/4DA9VBS0O5GFRS4C7dHhmSTPfDAgEC +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.crt b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.crt new file mode 100644 index 00000000000..9167cbf861d --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDQDCCAigCFCJ7El0ntrGktZVTYTZd+OwtcJjBMA0GCSqGSIb3DQEBCwUAMFox +CzAJBgNVBAYTAkNBMQswCQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYD +VQQKDAhBbHRpbml0eTELMAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwHhcNMjAw +NjExMTkxMTQzWhcNMzAwNjA5MTkxMTQzWjBfMQswCQYDVQQGEwJDQTELMAkGA1UE +CAwCT04xDzANBgNVBAcMBk90dGF3YTERMA8GA1UECgwIQWx0aW5pdHkxCzAJBgNV +BAsMAlFBMRIwEAYDVQQDDAlvcGVubGRhcDIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC0Mbn//U56URavMgXm82FWP6vBdKuRydFX/L0M5XLlnAtk/IXG +/T+4t7nOBJxWmTp/xpsPtSMALE4eFJpEUEqlpVbG5DfBzVWcYOWoMeRAcHWCDkzr +PkB6I0dfF0Mm5hoaDhn+ZXjBWvoh/IlJdAnPg5mlejflJBQ7xtFC9eN6WjldXuRO +vyntGNuMfVLgITHwXuH2yZ98G0mFO6TU/9dRY/Z3D6RTSzKdb17Yk/VnG+ry92u2 +0sgXIBvhuJuC3ksWLArwwFoMl8DVa05D4O2H76goGdCcQ0KzqBV8RPXAh3UcgP2e +Zu90p2EGIhIk+sZTCkPd4dorxjL9nkRR86HdAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggEBAJWiCxJaTksv/BTsh/etxlDY5eHwqStqIuiovEQ8bhGAcKJ3bfWd/YTb8DUS +hrLvXrXdOVC+U8PqPFXBpdOqcm5Dc233z52VgUCb+0EKv3lAzgKXRIo32h52skdK +NnRrCHDeDzgfEIXR4MEJ99cLEaxWyXQhremmTYWHYznry9/4NYz40gCDxHn9dJAi +KxFyDNxhtuKs58zp4PrBoo+542JurAoLPtRGOhdXpU2RkQVU/ho38HsAXDStAB5D +vAoSxPuMHKgo17ffrb0oqU3didwaA9fIsz7Mr6RxmI7X03s7hLzNBq9FCqu0U3RR +CX4zWGFNJu/ieSGVWLYKQzbYxp8= +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.csr b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.csr new file mode 100644 index 00000000000..bf569f727d6 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.csr @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIICpDCCAYwCAQAwXzELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQH +DAZPdHRhd2ExETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTESMBAGA1UE +AwwJb3BlbmxkYXAyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtDG5 +//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyFxv0/uLe5zgScVpk6f8ab +D7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M6z5AeiNHXxdDJuYaGg4Z +/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7kTr8p7RjbjH1S4CEx8F7h +9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdrttLIFyAb4bibgt5LFiwK +8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9nmbvdKdhBiISJPrGUwpD +3eHaK8Yy/Z5EUfOh3QIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAEzIjZQOT5R7 +mEJg+RFpCSIoPn3xJ4/VMMyWqA3bTGZKpb4S6GxgsierY/87kPL7jZrMdGYB4Dc3 +2M3VWZGXlYo8vctH1zLE9VW6CzosUpl20lhdgydoCMz3RQqdJyK8aGeFTeLtk7G/ +TRCCUFUE6jaA+VtaCPCnOJSff3jUf76xguEu7dgTZgCKV7dtBqald8gIzF3D+AJJ +7pEN2UrC3UR0xpe2cj2GhndQJ+WsIyft3zpNFzAO13j8ZPibuVP7oDWcW3ixNCWC +213aeRVplJGof8Eo6llDxP+6Fwp1YmOoQmwB1Xm3t4ADn7FLJ14LONLB7q40KviG +RyLyqu3IVOI= +-----END CERTIFICATE REQUEST----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.key b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.key new file mode 100644 index 00000000000..5ab3a3f8b59 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/certs/ldap.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAtDG5//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyF +xv0/uLe5zgScVpk6f8abD7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M +6z5AeiNHXxdDJuYaGg4Z/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7k +Tr8p7RjbjH1S4CEx8F7h9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdr +ttLIFyAb4bibgt5LFiwK8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9 +nmbvdKdhBiISJPrGUwpD3eHaK8Yy/Z5EUfOh3QIDAQABAoIBADugMMIKWcuTxYPX +c6iGZHEbxIPRTWyCcalB0nTQAAMGbabPAJ1l8432DZ+kWu806OybFXhPIfPOtVKy +0pFEWE8TtPE/V0vj3C5Qye2sBLFmBRwyCzXUdZV00wseMXRPs9dnTyalAR5KMnbI +j80kfpKSI2dkV9aU57UYBuq3Xrx/TCGItwL769D4ZZW9BvbpiTZApQQFZ0gwUFFn +btPXGU9Ti8H4mfBuZWL+5CaZdqOo76+CXvMPaUK0F9MJp4yX3XxQLRNH3qz/Tyn7 +h7QOOo0XTqoUmzRw0N9QRVH5LRdSE5yq3aF9aFKjNW59exz+62pufOFadngzkpkn +OKCzgWkCgYEA4mOWWMzdYwMn3GtfG7whqlqy7wOmMkNb81zTDQejHBV98dnj0AHr +deurfKWzHrAh3DXo6tFeqUIgXabhBPS/0dEx/S5sgLFmuUZP05EUYahfWBgzzmM9 +C6Oe5xIMLzxsZCJczolsfkEsoFe4o0vkvuLYoQrQL7InzewcDy8cUxsCgYEAy8Na +YCnanSNDY03Bulcni+5sF+opaHseeki1pv3nlw8TwsWuZF9ApS+yL7ck9jJjxBRR +RC3KGmpoqIr0vTmUYS946ngQWXPE90zfuhJfM+NRv/q0oCjH0qAcxRbTkls5On9v +oxJ8rO7gD6K85eHqasWdbCVzdZrobOXzay37tmcCgYBfyUUmw190cjReZauzH3Gb +E48b5A5gu/Fe0cqWe8G+szU7rDZgnz9SAGnpbm6QMHPTKZgoKngD42+wUFhq8Wdr +zjh5aDgOZ4EQKTjDSmI2Q7g7nNnmnESK9SrZl+BB6C3wXD2qQaj+7nKEUTlVFlpt +jaucz+dwFtASp7Djl8pDOwKBgEtr2c3ycArt/ImLRIP2spqm+7e2YvFbcSKOOz6+ +iLRvTj8v8KcSYtlB2FC1F6dRa4AujQ4RbNduP6LzHDfWUkfOzJDtNBAIPAXVnJJB +LqAEKkRHRghqT9x0i3GgS1vHDF3MwcO4mhFgserXr9ffUWeIEgbvrdcAKbv1Oa6Y +bK1NAoGAGPm8ISmboDJynjBl9wMrkcy23Pwg9kmyocdWUHh0zMLDKriZNKYB6u/U +C+/RTfkohPoHPzkeqWiHp7z3JhMItYUfTkNW6vMCxEGc0NEN6ZyMIjtiDPGN1n6O +E7jmODFmj1AQICQGdV5SHp+yKvKyb0YHKyDwETbs4SZBXxVvjEw= +-----END RSA PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap2/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap2/config/export.ldif new file mode 100644 index 00000000000..6766aaae6f1 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap2/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user2,ou=users,dc=company,dc=com +dn: cn=user2,ou=users,dc=company,dc=com +cn: user2 +gidnumber: 501 +givenname: John +homedirectory: /home/users/user2 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User +uid: user2 +uidnumber: 1002 +userpassword: user2 diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ca.crt b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ca.crt new file mode 100644 index 00000000000..8c71e3afc91 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ca.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIUJBqw2dHM2DDCZjYSkPOESlvDH6swDQYJKoZIhvcNAQEL +BQAwWjELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQHDAZPdHRhd2Ex +ETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTENMAsGA1UEAwwEcm9vdDAe +Fw0yMDA2MTExOTAzNDhaFw0zMDA2MDkxOTAzNDhaMFoxCzAJBgNVBAYTAkNBMQsw +CQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYDVQQKDAhBbHRpbml0eTEL +MAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC9Irr0zGV+HCI2fZ0ht4hR5It4Sbjz4RwZV8ENRP/+TEz8l9eK +J6ygxhKX7SMYzIs/jS9Gsq4plX1r2ujW1qRf8yLpR4+dGLP+jBRi1drj0XjZXosT +SERjWzgPauWxL9LN8+l26eBAqz6fw5e0W8WRSTgf5iGiCcKOTmaATIUjP0CdfWKK +qpktI4vhe++CXZFJ3usR+8KZ/FwwbCLJM/3J2HnbcXfcaYPYvr1tfqLudKSTbG9H +M3+AVwjctdesc/0sbd51Zsm0ClQptMbuKnDCYauGg61kNkgbgPgRmH9Pzo67DtxF +/WW+PtOzq8xLOifciQ9Piboy9QBSQZGwf4wzAgMBAAGjUzBRMB0GA1UdDgQWBBSi +njya0RDozx3OZTLYFpwqYnlpIDAfBgNVHSMEGDAWgBSinjya0RDozx3OZTLYFpwq +YnlpIDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBAD7VyFg7F +U1C25KFvtauchAOjCW6w7U/b3z1dVZvcQ88/kH1VsLUcfGixlSilUEfPTJsi7OA0 +R5BQdh2GGcjUJv4iqEFGU05KvMVmRRKn08P62+ZhJxKMxG26VzcliRZzCMkI6d0W +lFwI6nM45yeqdHVh5k4xbuJzqpbD9BtXXLI+/Ra9Fx8S9ETA3GdidpZLU5P1VLxq +UuedfqyAVWZXpr6TAURGxouRmRzul9yFzbSUex+MLEIPrstjtEwV3+tBQZJz9xAS +TVPj+Nv3LO7GCq54bdwkq1ioWbSL2hEmABkj6kdW/JwmfhGHf/2rirDVMzrTYw07 +dFJfAZC+FEsv +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/certs/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/dhparam.pem new file mode 100644 index 00000000000..0a96faffd62 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/dhparam.pem @@ -0,0 +1,5 @@ +-----BEGIN DH PARAMETERS----- +MIGHAoGBAJitt2hhnpDViQ5ko2ipBMdjy+bZ6FR/WdZ987R7lQvBkKehPXmxtEyV +AO6ofv5CZSDJokc5bUeBOAtg0EhMTCH82uPdwQvt58jRXcxXBg4JTjkx+oW9LBv2 +FdZsbaX8+SYivmiZ0Jp8T/HBm/4DA9VBS0O5GFRS4C7dHhmSTPfDAgEC +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.crt b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.crt new file mode 100644 index 00000000000..9167cbf861d --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDQDCCAigCFCJ7El0ntrGktZVTYTZd+OwtcJjBMA0GCSqGSIb3DQEBCwUAMFox +CzAJBgNVBAYTAkNBMQswCQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYD +VQQKDAhBbHRpbml0eTELMAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwHhcNMjAw +NjExMTkxMTQzWhcNMzAwNjA5MTkxMTQzWjBfMQswCQYDVQQGEwJDQTELMAkGA1UE +CAwCT04xDzANBgNVBAcMBk90dGF3YTERMA8GA1UECgwIQWx0aW5pdHkxCzAJBgNV +BAsMAlFBMRIwEAYDVQQDDAlvcGVubGRhcDIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC0Mbn//U56URavMgXm82FWP6vBdKuRydFX/L0M5XLlnAtk/IXG +/T+4t7nOBJxWmTp/xpsPtSMALE4eFJpEUEqlpVbG5DfBzVWcYOWoMeRAcHWCDkzr +PkB6I0dfF0Mm5hoaDhn+ZXjBWvoh/IlJdAnPg5mlejflJBQ7xtFC9eN6WjldXuRO +vyntGNuMfVLgITHwXuH2yZ98G0mFO6TU/9dRY/Z3D6RTSzKdb17Yk/VnG+ry92u2 +0sgXIBvhuJuC3ksWLArwwFoMl8DVa05D4O2H76goGdCcQ0KzqBV8RPXAh3UcgP2e +Zu90p2EGIhIk+sZTCkPd4dorxjL9nkRR86HdAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggEBAJWiCxJaTksv/BTsh/etxlDY5eHwqStqIuiovEQ8bhGAcKJ3bfWd/YTb8DUS +hrLvXrXdOVC+U8PqPFXBpdOqcm5Dc233z52VgUCb+0EKv3lAzgKXRIo32h52skdK +NnRrCHDeDzgfEIXR4MEJ99cLEaxWyXQhremmTYWHYznry9/4NYz40gCDxHn9dJAi +KxFyDNxhtuKs58zp4PrBoo+542JurAoLPtRGOhdXpU2RkQVU/ho38HsAXDStAB5D +vAoSxPuMHKgo17ffrb0oqU3didwaA9fIsz7Mr6RxmI7X03s7hLzNBq9FCqu0U3RR +CX4zWGFNJu/ieSGVWLYKQzbYxp8= +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.csr b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.csr new file mode 100644 index 00000000000..bf569f727d6 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.csr @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIICpDCCAYwCAQAwXzELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQH +DAZPdHRhd2ExETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTESMBAGA1UE +AwwJb3BlbmxkYXAyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtDG5 +//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyFxv0/uLe5zgScVpk6f8ab +D7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M6z5AeiNHXxdDJuYaGg4Z +/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7kTr8p7RjbjH1S4CEx8F7h +9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdrttLIFyAb4bibgt5LFiwK +8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9nmbvdKdhBiISJPrGUwpD +3eHaK8Yy/Z5EUfOh3QIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAEzIjZQOT5R7 +mEJg+RFpCSIoPn3xJ4/VMMyWqA3bTGZKpb4S6GxgsierY/87kPL7jZrMdGYB4Dc3 +2M3VWZGXlYo8vctH1zLE9VW6CzosUpl20lhdgydoCMz3RQqdJyK8aGeFTeLtk7G/ +TRCCUFUE6jaA+VtaCPCnOJSff3jUf76xguEu7dgTZgCKV7dtBqald8gIzF3D+AJJ +7pEN2UrC3UR0xpe2cj2GhndQJ+WsIyft3zpNFzAO13j8ZPibuVP7oDWcW3ixNCWC +213aeRVplJGof8Eo6llDxP+6Fwp1YmOoQmwB1Xm3t4ADn7FLJ14LONLB7q40KviG +RyLyqu3IVOI= +-----END CERTIFICATE REQUEST----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.key b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.key new file mode 100644 index 00000000000..5ab3a3f8b59 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/certs/ldap.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAtDG5//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyF +xv0/uLe5zgScVpk6f8abD7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M +6z5AeiNHXxdDJuYaGg4Z/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7k +Tr8p7RjbjH1S4CEx8F7h9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdr +ttLIFyAb4bibgt5LFiwK8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9 +nmbvdKdhBiISJPrGUwpD3eHaK8Yy/Z5EUfOh3QIDAQABAoIBADugMMIKWcuTxYPX +c6iGZHEbxIPRTWyCcalB0nTQAAMGbabPAJ1l8432DZ+kWu806OybFXhPIfPOtVKy +0pFEWE8TtPE/V0vj3C5Qye2sBLFmBRwyCzXUdZV00wseMXRPs9dnTyalAR5KMnbI +j80kfpKSI2dkV9aU57UYBuq3Xrx/TCGItwL769D4ZZW9BvbpiTZApQQFZ0gwUFFn +btPXGU9Ti8H4mfBuZWL+5CaZdqOo76+CXvMPaUK0F9MJp4yX3XxQLRNH3qz/Tyn7 +h7QOOo0XTqoUmzRw0N9QRVH5LRdSE5yq3aF9aFKjNW59exz+62pufOFadngzkpkn +OKCzgWkCgYEA4mOWWMzdYwMn3GtfG7whqlqy7wOmMkNb81zTDQejHBV98dnj0AHr +deurfKWzHrAh3DXo6tFeqUIgXabhBPS/0dEx/S5sgLFmuUZP05EUYahfWBgzzmM9 +C6Oe5xIMLzxsZCJczolsfkEsoFe4o0vkvuLYoQrQL7InzewcDy8cUxsCgYEAy8Na +YCnanSNDY03Bulcni+5sF+opaHseeki1pv3nlw8TwsWuZF9ApS+yL7ck9jJjxBRR +RC3KGmpoqIr0vTmUYS946ngQWXPE90zfuhJfM+NRv/q0oCjH0qAcxRbTkls5On9v +oxJ8rO7gD6K85eHqasWdbCVzdZrobOXzay37tmcCgYBfyUUmw190cjReZauzH3Gb +E48b5A5gu/Fe0cqWe8G+szU7rDZgnz9SAGnpbm6QMHPTKZgoKngD42+wUFhq8Wdr +zjh5aDgOZ4EQKTjDSmI2Q7g7nNnmnESK9SrZl+BB6C3wXD2qQaj+7nKEUTlVFlpt +jaucz+dwFtASp7Djl8pDOwKBgEtr2c3ycArt/ImLRIP2spqm+7e2YvFbcSKOOz6+ +iLRvTj8v8KcSYtlB2FC1F6dRa4AujQ4RbNduP6LzHDfWUkfOzJDtNBAIPAXVnJJB +LqAEKkRHRghqT9x0i3GgS1vHDF3MwcO4mhFgserXr9ffUWeIEgbvrdcAKbv1Oa6Y +bK1NAoGAGPm8ISmboDJynjBl9wMrkcy23Pwg9kmyocdWUHh0zMLDKriZNKYB6u/U +C+/RTfkohPoHPzkeqWiHp7z3JhMItYUfTkNW6vMCxEGc0NEN6ZyMIjtiDPGN1n6O +E7jmODFmj1AQICQGdV5SHp+yKvKyb0YHKyDwETbs4SZBXxVvjEw= +-----END RSA PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap3/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap3/config/export.ldif new file mode 100644 index 00000000000..6ac9a995efd --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap3/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user3,ou=users,dc=company,dc=com +dn: cn=user3,ou=users,dc=company,dc=com +cn: user3 +gidnumber: 501 +givenname: John +homedirectory: /home/users/user3 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User +uid: user3 +uidnumber: 1003 +userpassword: user3 diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ca.crt b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ca.crt new file mode 100644 index 00000000000..8c71e3afc91 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ca.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIUJBqw2dHM2DDCZjYSkPOESlvDH6swDQYJKoZIhvcNAQEL +BQAwWjELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQHDAZPdHRhd2Ex +ETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTENMAsGA1UEAwwEcm9vdDAe +Fw0yMDA2MTExOTAzNDhaFw0zMDA2MDkxOTAzNDhaMFoxCzAJBgNVBAYTAkNBMQsw +CQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYDVQQKDAhBbHRpbml0eTEL +MAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC9Irr0zGV+HCI2fZ0ht4hR5It4Sbjz4RwZV8ENRP/+TEz8l9eK +J6ygxhKX7SMYzIs/jS9Gsq4plX1r2ujW1qRf8yLpR4+dGLP+jBRi1drj0XjZXosT +SERjWzgPauWxL9LN8+l26eBAqz6fw5e0W8WRSTgf5iGiCcKOTmaATIUjP0CdfWKK +qpktI4vhe++CXZFJ3usR+8KZ/FwwbCLJM/3J2HnbcXfcaYPYvr1tfqLudKSTbG9H +M3+AVwjctdesc/0sbd51Zsm0ClQptMbuKnDCYauGg61kNkgbgPgRmH9Pzo67DtxF +/WW+PtOzq8xLOifciQ9Piboy9QBSQZGwf4wzAgMBAAGjUzBRMB0GA1UdDgQWBBSi +njya0RDozx3OZTLYFpwqYnlpIDAfBgNVHSMEGDAWgBSinjya0RDozx3OZTLYFpwq +YnlpIDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBAD7VyFg7F +U1C25KFvtauchAOjCW6w7U/b3z1dVZvcQ88/kH1VsLUcfGixlSilUEfPTJsi7OA0 +R5BQdh2GGcjUJv4iqEFGU05KvMVmRRKn08P62+ZhJxKMxG26VzcliRZzCMkI6d0W +lFwI6nM45yeqdHVh5k4xbuJzqpbD9BtXXLI+/Ra9Fx8S9ETA3GdidpZLU5P1VLxq +UuedfqyAVWZXpr6TAURGxouRmRzul9yFzbSUex+MLEIPrstjtEwV3+tBQZJz9xAS +TVPj+Nv3LO7GCq54bdwkq1ioWbSL2hEmABkj6kdW/JwmfhGHf/2rirDVMzrTYw07 +dFJfAZC+FEsv +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/certs/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/dhparam.pem new file mode 100644 index 00000000000..0a96faffd62 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/dhparam.pem @@ -0,0 +1,5 @@ +-----BEGIN DH PARAMETERS----- +MIGHAoGBAJitt2hhnpDViQ5ko2ipBMdjy+bZ6FR/WdZ987R7lQvBkKehPXmxtEyV +AO6ofv5CZSDJokc5bUeBOAtg0EhMTCH82uPdwQvt58jRXcxXBg4JTjkx+oW9LBv2 +FdZsbaX8+SYivmiZ0Jp8T/HBm/4DA9VBS0O5GFRS4C7dHhmSTPfDAgEC +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.crt b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.crt new file mode 100644 index 00000000000..9167cbf861d --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDQDCCAigCFCJ7El0ntrGktZVTYTZd+OwtcJjBMA0GCSqGSIb3DQEBCwUAMFox +CzAJBgNVBAYTAkNBMQswCQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYD +VQQKDAhBbHRpbml0eTELMAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwHhcNMjAw +NjExMTkxMTQzWhcNMzAwNjA5MTkxMTQzWjBfMQswCQYDVQQGEwJDQTELMAkGA1UE +CAwCT04xDzANBgNVBAcMBk90dGF3YTERMA8GA1UECgwIQWx0aW5pdHkxCzAJBgNV +BAsMAlFBMRIwEAYDVQQDDAlvcGVubGRhcDIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC0Mbn//U56URavMgXm82FWP6vBdKuRydFX/L0M5XLlnAtk/IXG +/T+4t7nOBJxWmTp/xpsPtSMALE4eFJpEUEqlpVbG5DfBzVWcYOWoMeRAcHWCDkzr +PkB6I0dfF0Mm5hoaDhn+ZXjBWvoh/IlJdAnPg5mlejflJBQ7xtFC9eN6WjldXuRO +vyntGNuMfVLgITHwXuH2yZ98G0mFO6TU/9dRY/Z3D6RTSzKdb17Yk/VnG+ry92u2 +0sgXIBvhuJuC3ksWLArwwFoMl8DVa05D4O2H76goGdCcQ0KzqBV8RPXAh3UcgP2e +Zu90p2EGIhIk+sZTCkPd4dorxjL9nkRR86HdAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggEBAJWiCxJaTksv/BTsh/etxlDY5eHwqStqIuiovEQ8bhGAcKJ3bfWd/YTb8DUS +hrLvXrXdOVC+U8PqPFXBpdOqcm5Dc233z52VgUCb+0EKv3lAzgKXRIo32h52skdK +NnRrCHDeDzgfEIXR4MEJ99cLEaxWyXQhremmTYWHYznry9/4NYz40gCDxHn9dJAi +KxFyDNxhtuKs58zp4PrBoo+542JurAoLPtRGOhdXpU2RkQVU/ho38HsAXDStAB5D +vAoSxPuMHKgo17ffrb0oqU3didwaA9fIsz7Mr6RxmI7X03s7hLzNBq9FCqu0U3RR +CX4zWGFNJu/ieSGVWLYKQzbYxp8= +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.csr b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.csr new file mode 100644 index 00000000000..bf569f727d6 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.csr @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIICpDCCAYwCAQAwXzELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQH +DAZPdHRhd2ExETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTESMBAGA1UE +AwwJb3BlbmxkYXAyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtDG5 +//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyFxv0/uLe5zgScVpk6f8ab +D7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M6z5AeiNHXxdDJuYaGg4Z +/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7kTr8p7RjbjH1S4CEx8F7h +9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdrttLIFyAb4bibgt5LFiwK +8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9nmbvdKdhBiISJPrGUwpD +3eHaK8Yy/Z5EUfOh3QIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAEzIjZQOT5R7 +mEJg+RFpCSIoPn3xJ4/VMMyWqA3bTGZKpb4S6GxgsierY/87kPL7jZrMdGYB4Dc3 +2M3VWZGXlYo8vctH1zLE9VW6CzosUpl20lhdgydoCMz3RQqdJyK8aGeFTeLtk7G/ +TRCCUFUE6jaA+VtaCPCnOJSff3jUf76xguEu7dgTZgCKV7dtBqald8gIzF3D+AJJ +7pEN2UrC3UR0xpe2cj2GhndQJ+WsIyft3zpNFzAO13j8ZPibuVP7oDWcW3ixNCWC +213aeRVplJGof8Eo6llDxP+6Fwp1YmOoQmwB1Xm3t4ADn7FLJ14LONLB7q40KviG +RyLyqu3IVOI= +-----END CERTIFICATE REQUEST----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.key b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.key new file mode 100644 index 00000000000..5ab3a3f8b59 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/certs/ldap.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAtDG5//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyF +xv0/uLe5zgScVpk6f8abD7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M +6z5AeiNHXxdDJuYaGg4Z/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7k +Tr8p7RjbjH1S4CEx8F7h9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdr +ttLIFyAb4bibgt5LFiwK8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9 +nmbvdKdhBiISJPrGUwpD3eHaK8Yy/Z5EUfOh3QIDAQABAoIBADugMMIKWcuTxYPX +c6iGZHEbxIPRTWyCcalB0nTQAAMGbabPAJ1l8432DZ+kWu806OybFXhPIfPOtVKy +0pFEWE8TtPE/V0vj3C5Qye2sBLFmBRwyCzXUdZV00wseMXRPs9dnTyalAR5KMnbI +j80kfpKSI2dkV9aU57UYBuq3Xrx/TCGItwL769D4ZZW9BvbpiTZApQQFZ0gwUFFn +btPXGU9Ti8H4mfBuZWL+5CaZdqOo76+CXvMPaUK0F9MJp4yX3XxQLRNH3qz/Tyn7 +h7QOOo0XTqoUmzRw0N9QRVH5LRdSE5yq3aF9aFKjNW59exz+62pufOFadngzkpkn +OKCzgWkCgYEA4mOWWMzdYwMn3GtfG7whqlqy7wOmMkNb81zTDQejHBV98dnj0AHr +deurfKWzHrAh3DXo6tFeqUIgXabhBPS/0dEx/S5sgLFmuUZP05EUYahfWBgzzmM9 +C6Oe5xIMLzxsZCJczolsfkEsoFe4o0vkvuLYoQrQL7InzewcDy8cUxsCgYEAy8Na +YCnanSNDY03Bulcni+5sF+opaHseeki1pv3nlw8TwsWuZF9ApS+yL7ck9jJjxBRR +RC3KGmpoqIr0vTmUYS946ngQWXPE90zfuhJfM+NRv/q0oCjH0qAcxRbTkls5On9v +oxJ8rO7gD6K85eHqasWdbCVzdZrobOXzay37tmcCgYBfyUUmw190cjReZauzH3Gb +E48b5A5gu/Fe0cqWe8G+szU7rDZgnz9SAGnpbm6QMHPTKZgoKngD42+wUFhq8Wdr +zjh5aDgOZ4EQKTjDSmI2Q7g7nNnmnESK9SrZl+BB6C3wXD2qQaj+7nKEUTlVFlpt +jaucz+dwFtASp7Djl8pDOwKBgEtr2c3ycArt/ImLRIP2spqm+7e2YvFbcSKOOz6+ +iLRvTj8v8KcSYtlB2FC1F6dRa4AujQ4RbNduP6LzHDfWUkfOzJDtNBAIPAXVnJJB +LqAEKkRHRghqT9x0i3GgS1vHDF3MwcO4mhFgserXr9ffUWeIEgbvrdcAKbv1Oa6Y +bK1NAoGAGPm8ISmboDJynjBl9wMrkcy23Pwg9kmyocdWUHh0zMLDKriZNKYB6u/U +C+/RTfkohPoHPzkeqWiHp7z3JhMItYUfTkNW6vMCxEGc0NEN6ZyMIjtiDPGN1n6O +E7jmODFmj1AQICQGdV5SHp+yKvKyb0YHKyDwETbs4SZBXxVvjEw= +-----END RSA PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap4/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap4/config/export.ldif new file mode 100644 index 00000000000..36afdb4e350 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap4/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user4,ou=users,dc=company,dc=com +dn: cn=user4,ou=users,dc=company,dc=com +cn: user4 +gidnumber: 501 +givenname: John +homedirectory: /home/users/user4 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User +uid: user4 +uidnumber: 1004 +userpassword: user4 diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap5/config/export.ldif new file mode 100644 index 00000000000..bc3d2ff75fc --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user5,ou=users,dc=company,dc=com +dn: cn=user5,ou=users,dc=company,dc=com +cn: user5 +gidnumber: 501 +givenname: John +homedirectory: /home/users/user5 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User +uid: user5 +uidnumber: 1005 +userpassword: user5 diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ca.crt b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ca.crt new file mode 100644 index 00000000000..8c71e3afc91 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ca.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIUJBqw2dHM2DDCZjYSkPOESlvDH6swDQYJKoZIhvcNAQEL +BQAwWjELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQHDAZPdHRhd2Ex +ETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTENMAsGA1UEAwwEcm9vdDAe +Fw0yMDA2MTExOTAzNDhaFw0zMDA2MDkxOTAzNDhaMFoxCzAJBgNVBAYTAkNBMQsw +CQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYDVQQKDAhBbHRpbml0eTEL +MAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC9Irr0zGV+HCI2fZ0ht4hR5It4Sbjz4RwZV8ENRP/+TEz8l9eK +J6ygxhKX7SMYzIs/jS9Gsq4plX1r2ujW1qRf8yLpR4+dGLP+jBRi1drj0XjZXosT +SERjWzgPauWxL9LN8+l26eBAqz6fw5e0W8WRSTgf5iGiCcKOTmaATIUjP0CdfWKK +qpktI4vhe++CXZFJ3usR+8KZ/FwwbCLJM/3J2HnbcXfcaYPYvr1tfqLudKSTbG9H +M3+AVwjctdesc/0sbd51Zsm0ClQptMbuKnDCYauGg61kNkgbgPgRmH9Pzo67DtxF +/WW+PtOzq8xLOifciQ9Piboy9QBSQZGwf4wzAgMBAAGjUzBRMB0GA1UdDgQWBBSi +njya0RDozx3OZTLYFpwqYnlpIDAfBgNVHSMEGDAWgBSinjya0RDozx3OZTLYFpwq +YnlpIDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBAD7VyFg7F +U1C25KFvtauchAOjCW6w7U/b3z1dVZvcQ88/kH1VsLUcfGixlSilUEfPTJsi7OA0 +R5BQdh2GGcjUJv4iqEFGU05KvMVmRRKn08P62+ZhJxKMxG26VzcliRZzCMkI6d0W +lFwI6nM45yeqdHVh5k4xbuJzqpbD9BtXXLI+/Ra9Fx8S9ETA3GdidpZLU5P1VLxq +UuedfqyAVWZXpr6TAURGxouRmRzul9yFzbSUex+MLEIPrstjtEwV3+tBQZJz9xAS +TVPj+Nv3LO7GCq54bdwkq1ioWbSL2hEmABkj6kdW/JwmfhGHf/2rirDVMzrTYw07 +dFJfAZC+FEsv +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/dhparam.pem b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/dhparam.pem new file mode 100644 index 00000000000..0a96faffd62 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/dhparam.pem @@ -0,0 +1,5 @@ +-----BEGIN DH PARAMETERS----- +MIGHAoGBAJitt2hhnpDViQ5ko2ipBMdjy+bZ6FR/WdZ987R7lQvBkKehPXmxtEyV +AO6ofv5CZSDJokc5bUeBOAtg0EhMTCH82uPdwQvt58jRXcxXBg4JTjkx+oW9LBv2 +FdZsbaX8+SYivmiZ0Jp8T/HBm/4DA9VBS0O5GFRS4C7dHhmSTPfDAgEC +-----END DH PARAMETERS----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.crt b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.crt new file mode 100644 index 00000000000..9167cbf861d --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDQDCCAigCFCJ7El0ntrGktZVTYTZd+OwtcJjBMA0GCSqGSIb3DQEBCwUAMFox +CzAJBgNVBAYTAkNBMQswCQYDVQQIDAJPTjEPMA0GA1UEBwwGT3R0YXdhMREwDwYD +VQQKDAhBbHRpbml0eTELMAkGA1UECwwCUUExDTALBgNVBAMMBHJvb3QwHhcNMjAw +NjExMTkxMTQzWhcNMzAwNjA5MTkxMTQzWjBfMQswCQYDVQQGEwJDQTELMAkGA1UE +CAwCT04xDzANBgNVBAcMBk90dGF3YTERMA8GA1UECgwIQWx0aW5pdHkxCzAJBgNV +BAsMAlFBMRIwEAYDVQQDDAlvcGVubGRhcDIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC0Mbn//U56URavMgXm82FWP6vBdKuRydFX/L0M5XLlnAtk/IXG +/T+4t7nOBJxWmTp/xpsPtSMALE4eFJpEUEqlpVbG5DfBzVWcYOWoMeRAcHWCDkzr +PkB6I0dfF0Mm5hoaDhn+ZXjBWvoh/IlJdAnPg5mlejflJBQ7xtFC9eN6WjldXuRO +vyntGNuMfVLgITHwXuH2yZ98G0mFO6TU/9dRY/Z3D6RTSzKdb17Yk/VnG+ry92u2 +0sgXIBvhuJuC3ksWLArwwFoMl8DVa05D4O2H76goGdCcQ0KzqBV8RPXAh3UcgP2e +Zu90p2EGIhIk+sZTCkPd4dorxjL9nkRR86HdAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggEBAJWiCxJaTksv/BTsh/etxlDY5eHwqStqIuiovEQ8bhGAcKJ3bfWd/YTb8DUS +hrLvXrXdOVC+U8PqPFXBpdOqcm5Dc233z52VgUCb+0EKv3lAzgKXRIo32h52skdK +NnRrCHDeDzgfEIXR4MEJ99cLEaxWyXQhremmTYWHYznry9/4NYz40gCDxHn9dJAi +KxFyDNxhtuKs58zp4PrBoo+542JurAoLPtRGOhdXpU2RkQVU/ho38HsAXDStAB5D +vAoSxPuMHKgo17ffrb0oqU3didwaA9fIsz7Mr6RxmI7X03s7hLzNBq9FCqu0U3RR +CX4zWGFNJu/ieSGVWLYKQzbYxp8= +-----END CERTIFICATE----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.csr b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.csr new file mode 100644 index 00000000000..bf569f727d6 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.csr @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIICpDCCAYwCAQAwXzELMAkGA1UEBhMCQ0ExCzAJBgNVBAgMAk9OMQ8wDQYDVQQH +DAZPdHRhd2ExETAPBgNVBAoMCEFsdGluaXR5MQswCQYDVQQLDAJRQTESMBAGA1UE +AwwJb3BlbmxkYXAyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtDG5 +//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyFxv0/uLe5zgScVpk6f8ab +D7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M6z5AeiNHXxdDJuYaGg4Z +/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7kTr8p7RjbjH1S4CEx8F7h +9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdrttLIFyAb4bibgt5LFiwK +8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9nmbvdKdhBiISJPrGUwpD +3eHaK8Yy/Z5EUfOh3QIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAEzIjZQOT5R7 +mEJg+RFpCSIoPn3xJ4/VMMyWqA3bTGZKpb4S6GxgsierY/87kPL7jZrMdGYB4Dc3 +2M3VWZGXlYo8vctH1zLE9VW6CzosUpl20lhdgydoCMz3RQqdJyK8aGeFTeLtk7G/ +TRCCUFUE6jaA+VtaCPCnOJSff3jUf76xguEu7dgTZgCKV7dtBqald8gIzF3D+AJJ +7pEN2UrC3UR0xpe2cj2GhndQJ+WsIyft3zpNFzAO13j8ZPibuVP7oDWcW3ixNCWC +213aeRVplJGof8Eo6llDxP+6Fwp1YmOoQmwB1Xm3t4ADn7FLJ14LONLB7q40KviG +RyLyqu3IVOI= +-----END CERTIFICATE REQUEST----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.key b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.key new file mode 100644 index 00000000000..5ab3a3f8b59 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/certs/ldap.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAtDG5//1OelEWrzIF5vNhVj+rwXSrkcnRV/y9DOVy5ZwLZPyF +xv0/uLe5zgScVpk6f8abD7UjACxOHhSaRFBKpaVWxuQ3wc1VnGDlqDHkQHB1gg5M +6z5AeiNHXxdDJuYaGg4Z/mV4wVr6IfyJSXQJz4OZpXo35SQUO8bRQvXjelo5XV7k +Tr8p7RjbjH1S4CEx8F7h9smffBtJhTuk1P/XUWP2dw+kU0synW9e2JP1Zxvq8vdr +ttLIFyAb4bibgt5LFiwK8MBaDJfA1WtOQ+Dth++oKBnQnENCs6gVfET1wId1HID9 +nmbvdKdhBiISJPrGUwpD3eHaK8Yy/Z5EUfOh3QIDAQABAoIBADugMMIKWcuTxYPX +c6iGZHEbxIPRTWyCcalB0nTQAAMGbabPAJ1l8432DZ+kWu806OybFXhPIfPOtVKy +0pFEWE8TtPE/V0vj3C5Qye2sBLFmBRwyCzXUdZV00wseMXRPs9dnTyalAR5KMnbI +j80kfpKSI2dkV9aU57UYBuq3Xrx/TCGItwL769D4ZZW9BvbpiTZApQQFZ0gwUFFn +btPXGU9Ti8H4mfBuZWL+5CaZdqOo76+CXvMPaUK0F9MJp4yX3XxQLRNH3qz/Tyn7 +h7QOOo0XTqoUmzRw0N9QRVH5LRdSE5yq3aF9aFKjNW59exz+62pufOFadngzkpkn +OKCzgWkCgYEA4mOWWMzdYwMn3GtfG7whqlqy7wOmMkNb81zTDQejHBV98dnj0AHr +deurfKWzHrAh3DXo6tFeqUIgXabhBPS/0dEx/S5sgLFmuUZP05EUYahfWBgzzmM9 +C6Oe5xIMLzxsZCJczolsfkEsoFe4o0vkvuLYoQrQL7InzewcDy8cUxsCgYEAy8Na +YCnanSNDY03Bulcni+5sF+opaHseeki1pv3nlw8TwsWuZF9ApS+yL7ck9jJjxBRR +RC3KGmpoqIr0vTmUYS946ngQWXPE90zfuhJfM+NRv/q0oCjH0qAcxRbTkls5On9v +oxJ8rO7gD6K85eHqasWdbCVzdZrobOXzay37tmcCgYBfyUUmw190cjReZauzH3Gb +E48b5A5gu/Fe0cqWe8G+szU7rDZgnz9SAGnpbm6QMHPTKZgoKngD42+wUFhq8Wdr +zjh5aDgOZ4EQKTjDSmI2Q7g7nNnmnESK9SrZl+BB6C3wXD2qQaj+7nKEUTlVFlpt +jaucz+dwFtASp7Djl8pDOwKBgEtr2c3ycArt/ImLRIP2spqm+7e2YvFbcSKOOz6+ +iLRvTj8v8KcSYtlB2FC1F6dRa4AujQ4RbNduP6LzHDfWUkfOzJDtNBAIPAXVnJJB +LqAEKkRHRghqT9x0i3GgS1vHDF3MwcO4mhFgserXr9ffUWeIEgbvrdcAKbv1Oa6Y +bK1NAoGAGPm8ISmboDJynjBl9wMrkcy23Pwg9kmyocdWUHh0zMLDKriZNKYB6u/U +C+/RTfkohPoHPzkeqWiHp7z3JhMItYUfTkNW6vMCxEGc0NEN6ZyMIjtiDPGN1n6O +E7jmODFmj1AQICQGdV5SHp+yKvKyb0YHKyDwETbs4SZBXxVvjEw= +-----END RSA PRIVATE KEY----- diff --git a/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/config/export.ldif b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/config/export.ldif new file mode 100644 index 00000000000..c6470176a5e --- /dev/null +++ b/tests/testflows/ldap/role_mapping/configs/ldap5/ldap2/config/export.ldif @@ -0,0 +1,64 @@ +# LDIF Export for dc=company,dc=com +# Server: openldap (openldap) +# Search Scope: sub +# Search Filter: (objectClass=*) +# Total Entries: 7 +# +# Generated by phpLDAPadmin (http://phpldapadmin.sourceforge.net) on May 22, 2020 5:51 pm +# Version: 1.2.5 + +# Entry 1: dc=company,dc=com +#dn: dc=company,dc=com +#dc: company +#o: company +#objectclass: top +#objectclass: dcObject +#objectclass: organization + +# Entry 2: cn=admin,dc=company,dc=com +#dn: cn=admin,dc=company,dc=com +#cn: admin +#description: LDAP administrator +#objectclass: simpleSecurityObject +#objectclass: organizationalRole +#userpassword: {SSHA}eUEupkQCTvq9SkrxfWGSe5rX+orrjVbF + +# Entry 3: ou=groups,dc=company,dc=com +dn: ou=groups,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: groups + +# Entry 4: cn=admin,ou=groups,dc=company,dc=com +dn: cn=admin,ou=groups,dc=company,dc=com +cn: admin +gidnumber: 500 +objectclass: posixGroup +objectclass: top + +# Entry 5: cn=users,ou=groups,dc=company,dc=com +dn: cn=users,ou=groups,dc=company,dc=com +cn: users +gidnumber: 501 +objectclass: posixGroup +objectclass: top + +# Entry 6: ou=users,dc=company,dc=com +dn: ou=users,dc=company,dc=com +objectclass: organizationalUnit +objectclass: top +ou: users + +# Entry 7: cn=user1,ou=users,dc=company,dc=com +dn: cn=user1,ou=users,dc=company,dc=com +cn: user1 +gidnumber: 501 +givenname: John1 +homedirectory: /home/users/user1 +objectclass: inetOrgPerson +objectclass: posixAccount +objectclass: top +sn: User1 +uid: user1 +uidnumber: 1001 +userpassword: user1 diff --git a/tests/testflows/ldap/role_mapping/docker-compose/clickhouse-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/clickhouse-service.yml new file mode 100644 index 00000000000..0789decf022 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/docker-compose/clickhouse-service.yml @@ -0,0 +1,28 @@ +version: '2.3' + +services: + clickhouse: + image: yandex/clickhouse-integration-test + expose: + - "9000" + - "9009" + - "8123" + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse/config.d:/etc/clickhouse-server/config.d" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse/users.d/:/etc/clickhouse-server/users.d" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse/ssl:/etc/clickhouse-server/ssl" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse/config.xml:/etc/clickhouse-server/config.xml" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse/users.xml:/etc/clickhouse-server/users.xml" + - "${CLICKHOUSE_TESTS_SERVER_BIN_PATH:-/usr/bin/clickhouse}:/usr/bin/clickhouse" + - "${CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH:-/usr/bin/clickhouse-odbc-bridge}:/usr/bin/clickhouse-odbc-bridge" + entrypoint: bash -c "clickhouse server --config-file=/etc/clickhouse-server/config.xml --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" + healthcheck: + test: clickhouse client --query='select 1' + interval: 10s + timeout: 10s + retries: 10 + start_period: 300s + cap_add: + - SYS_PTRACE + security_opt: + - label:disable diff --git a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml new file mode 100644 index 00000000000..c8ff683df58 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml @@ -0,0 +1,162 @@ +version: '2.3' + +services: + openldap1: + # plain text + extends: + file: openldap-service.yml + service: openldap + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap1/config:/container/service/slapd/assets/config/bootstrap/ldif/custom" + + openldap2: + # TLS - never + extends: + file: openldap-service.yml + service: openldap + environment: + LDAP_TLS: "true" + LDAP_TLS_CRT_FILENAME: "ldap.crt" + LDAP_TLS_KEY_FILENAME: "ldap.key" + LDAP_TLS_DH_PARAM_FILENAME: "dhparam.pem" + LDAP_TLS_CA_CRT_FILENAME: "ca.crt" + LDAP_TLS_ENFORCE: "false" + LDAP_TLS_VERIFY_CLIENT: "never" + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap2/config:/container/service/slapd/assets/config/bootstrap/ldif/custom" + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap2/certs:/container/service/slapd/assets/certs/" + + openldap3: + # plain text - custom port + extends: + file: openldap-service.yml + service: openldap + expose: + - "3089" + environment: + LDAP_PORT: "3089" + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap3/config:/container/service/slapd/assets/config/bootstrap/ldif/custom" + + openldap4: + # TLS - never custom port + extends: + file: openldap-service.yml + service: openldap + expose: + - "3089" + - "6036" + environment: + LDAP_PORT: "3089" + LDAPS_PORT: "6036" + LDAP_TLS: "true" + LDAP_TLS_CRT_FILENAME: "ldap.crt" + LDAP_TLS_KEY_FILENAME: "ldap.key" + LDAP_TLS_DH_PARAM_FILENAME: "dhparam.pem" + LDAP_TLS_CA_CRT_FILENAME: "ca.crt" + LDAP_TLS_ENFORCE: "false" + LDAP_TLS_VERIFY_CLIENT: "never" + LDAP_TLS_CIPHER_SUITE: "SECURE256:+SECURE128:-VERS-TLS-ALL:+VERS-TLS1.2:-RSA:-DHE-DSS:-CAMELLIA-128-CBC:-CAMELLIA-256-CBC" + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap4/config:/container/service/slapd/assets/config/bootstrap/ldif/custom" + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap4/certs:/container/service/slapd/assets/certs/" + + openldap5: + # TLS - try + extends: + file: openldap-service.yml + service: openldap + environment: + LDAP_TLS: "true" + LDAP_TLS_CRT_FILENAME: "ldap.crt" + LDAP_TLS_KEY_FILENAME: "ldap.key" + LDAP_TLS_DH_PARAM_FILENAME: "dhparam.pem" + LDAP_TLS_CA_CRT_FILENAME: "ca.crt" + LDAP_TLS_ENFORCE: "false" + LDAP_TLS_VERIFY_CLIENT: "try" + volumes: + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap5/config:/container/service/slapd/assets/config/bootstrap/ldif/custom" + - "${CLICKHOUSE_TESTS_DIR}/configs/ldap5/certs:/container/service/slapd/assets/certs/" + + phpldapadmin: + extends: + file: openldap-service.yml + service: phpldapadmin + environment: + PHPLDAPADMIN_LDAP_HOSTS: "openldap1" + depends_on: + openldap1: + condition: service_healthy + + zookeeper: + extends: + file: zookeeper-service.yml + service: zookeeper + + clickhouse1: + extends: + file: clickhouse-service.yml + service: clickhouse + hostname: clickhouse1 + volumes: + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse1/database/:/var/lib/clickhouse/" + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse1/logs/:/var/log/clickhouse-server/" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse1/config.d:/etc/clickhouse-server/config.d" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse1/users.d:/etc/clickhouse-server/users.d" + depends_on: + zookeeper: + condition: service_healthy + + clickhouse2: + extends: + file: clickhouse-service.yml + service: clickhouse + hostname: clickhouse2 + volumes: + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse2/database/:/var/lib/clickhouse/" + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse2/logs/:/var/log/clickhouse-server/" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse2/config.d:/etc/clickhouse-server/config.d" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse2/users.d:/etc/clickhouse-server/users.d" + depends_on: + zookeeper: + condition: service_healthy + + clickhouse3: + extends: + file: clickhouse-service.yml + service: clickhouse + hostname: clickhouse3 + volumes: + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse3/database/:/var/lib/clickhouse/" + - "${CLICKHOUSE_TESTS_DIR}/_instances/clickhouse3/logs/:/var/log/clickhouse-server/" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse3/config.d:/etc/clickhouse-server/config.d" + - "${CLICKHOUSE_TESTS_DIR}/configs/clickhouse3/users.d:/etc/clickhouse-server/users.d" + depends_on: + zookeeper: + condition: service_healthy + + # dummy service which does nothing, but allows to postpone + # 'docker-compose up -d' till all dependecies will go healthy + all_services_ready: + image: hello-world + depends_on: + clickhouse1: + condition: service_healthy + clickhouse2: + condition: service_healthy + clickhouse3: + condition: service_healthy + zookeeper: + condition: service_healthy + openldap1: + condition: service_healthy + openldap2: + condition: service_healthy + openldap3: + condition: service_healthy + openldap4: + condition: service_healthy + openldap5: + condition: service_healthy + phpldapadmin: + condition: service_healthy diff --git a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml new file mode 100644 index 00000000000..139907c513c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml @@ -0,0 +1,40 @@ +version: '2.3' + +services: + openldap: + image: osixia/openldap:1.4.0 + command: "--copy-service --loglevel debug" + environment: + LDAP_ORGANIZATION: "company" + LDAP_DOMAIN: "company.com" + LDAP_ADMIN_PASSWORD: "admin" + LDAP_TLS: "false" + expose: + - "389" + - "636" + healthcheck: + test: ldapsearch -x -H ldap://localhost:$${LDAP_PORT:-389} -b "dc=company,dc=com" -D "cn=admin,dc=company,dc=com" -w admin + interval: 10s + timeout: 10s + retries: 3 + start_period: 300s + security_opt: + - label:disable + + + phpldapadmin: + image: osixia/phpldapadmin:0.9.0 + container_name: phpldapadmin + environment: + PHPLDAPADMIN_HTTPS=false: + ports: + - "8080:80" + healthcheck: + test: echo 1 + interval: 10s + timeout: 10s + retries: 3 + start_period: 300s + security_opt: + - label:disable + diff --git a/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml new file mode 100644 index 00000000000..6691a2df31c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml @@ -0,0 +1,18 @@ +version: '2.3' + +services: + zookeeper: + image: zookeeper:3.4.12 + expose: + - "2181" + environment: + ZOO_TICK_TIME: 500 + ZOO_MY_ID: 1 + healthcheck: + test: echo stat | nc localhost 2181 + interval: 10s + timeout: 10s + retries: 3 + start_period: 300s + security_opt: + - label:disable diff --git a/tests/testflows/ldap/role_mapping/regression.py b/tests/testflows/ldap/role_mapping/regression.py new file mode 100755 index 00000000000..fff1e72a945 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/regression.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import sys +from testflows.core import * + +append_path(sys.path, "..", "..") + +from helpers.cluster import Cluster +from helpers.argparser import argparser +from ldap.role_mapping.requirements import * + +# Cross-outs of known fails +xfails = { + "mapping/roles removed and added in parallel": + [(Fail, "known bug")] +} + +@TestFeature +@Name("role mapping") +@ArgumentParser(argparser) +@Specifications( + QA_SRS014_ClickHouse_LDAP_Role_Mapping +) +@Requirements( + RQ_SRS_014_LDAP_RoleMapping("1.0") +) +@XFails(xfails) +def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): + """ClickHouse LDAP role mapping regression module. + """ + nodes = { + "clickhouse": ("clickhouse1", "clickhouse2", "clickhouse3"), + } + + with Cluster(local, clickhouse_binary_path, nodes=nodes) as cluster: + self.context.cluster = cluster + + if stress is not None or not hasattr(self.context, "stress"): + self.context.stress = stress + if parallel is not None or not hasattr(self.context, "parallel"): + self.context.parallel = parallel + + Scenario(run=load("ldap.authentication.tests.sanity", "scenario"), name="ldap sanity") + Feature(run=load("ldap.role_mapping.tests.server_config", "feature")) + Feature(run=load("ldap.role_mapping.tests.mapping", "feature")) + +if main(): + regression() diff --git a/tests/testflows/ldap/role_mapping/requirements/__init__.py b/tests/testflows/ldap/role_mapping/requirements/__init__.py new file mode 100644 index 00000000000..02f7d430154 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/requirements/__init__.py @@ -0,0 +1 @@ +from .requirements import * diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.py b/tests/testflows/ldap/role_mapping/requirements/requirements.py new file mode 100644 index 00000000000..ca7192e9dad --- /dev/null +++ b/tests/testflows/ldap/role_mapping/requirements/requirements.py @@ -0,0 +1,1475 @@ +# These requirements were auto generated +# from software requirements specification (SRS) +# document by TestFlows v1.6.210101.1235930. +# Do not edit by hand but re-generate instead +# using 'tfs requirements generate' command. +from testflows.core import Specification +from testflows.core import Requirement + +Heading = Specification.Heading + +RQ_SRS_014_LDAP_RoleMapping = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles\n' + 'for users authenticated using [LDAP] external user directory.\n' + '\n' + ), + link=None, + level=3, + num='4.1.1') + +RQ_SRS_014_LDAP_RoleMapping_WithFixedRoles = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles\n' + 'for users authenticated using [LDAP] external user directory when\n' + 'one or more roles are specified in the `` section.\n' + '\n' + ), + link=None, + level=3, + num='4.1.2') + +RQ_SRS_014_LDAP_RoleMapping_Search = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Search', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL perform search on the [LDAP] server and map the results to [RBAC] role names \n' + 'when authenticating users using the [LDAP] external user directory if the `` section is configured\n' + 'as part of the [LDAP] external user directory. The matched roles SHALL be assigned to the user.\n' + '\n' + ), + link=None, + level=3, + num='4.1.3') + +RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithUTF8Characters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory\n' + 'to an [RBAC] role that contains UTF-8 characters.\n' + '\n' + ), + link=None, + level=3, + num='4.2.1') + +RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_Long = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory\n' + 'to an [RBAC] role that has a name with more than 128 characters.\n' + '\n' + ), + link=None, + level=3, + num='4.2.2') + +RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialXMLCharacters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory\n' + 'to an [RBAC] role that has a name that contains special characters that need to be escaped in XML.\n' + '\n' + ), + link=None, + level=3, + num='4.2.3') + +RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialRegexCharacters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory\n' + 'to an [RBAC] role that has a name that contains special characters that need to be escaped in regex.\n' + '\n' + ), + link=None, + level=3, + num='4.2.4') + +RQ_SRS_014_LDAP_RoleMapping_Map_MultipleRoles = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support mapping one or more [LDAP] search results for users authenticated using \n' + '[LDAP] external user directory to one or more [RBAC] role.\n' + '\n' + ), + link=None, + level=3, + num='4.3.1') + +RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_Removed = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL not assign [RBAC] role(s) for any users authenticated using [LDAP] external user directory\n' + 'if the corresponding [LDAP] group(s) that map those role(s) are removed. Any users that have active sessions SHALL still\n' + 'have privileges provided by the role(s) until the next time they are authenticated.\n' + '\n' + ), + link=None, + level=3, + num='4.4.1') + +RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_RemovedAndAdded_Parallel = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support authenticating users using [LDAP] external user directory \n' + 'when [LDAP] groups are removed and added \n' + 'at the same time as [LDAP] user authentications are performed in parallel.\n' + '\n' + ), + link=None, + level=3, + num='4.4.2') + +RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemoved = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL not assign [RBAC] role(s) for the user authenticated using [LDAP] external user directory\n' + 'if the user has been removed from the corresponding [LDAP] group(s) that map those role(s). \n' + 'Any active user sessions SHALL have privileges provided by the role(s) until the next time the user is authenticated.\n' + '\n' + ), + link=None, + level=3, + num='4.4.3') + +RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemovedAndAdded_Parallel = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support authenticating users using [LDAP] external user directory\n' + 'when [LDAP] users are added and removed from [LDAP] groups used to map to [RBAC] roles\n' + 'at the same time as [LDAP] user authentications are performed in parallel.\n' + '\n' + ), + link=None, + level=3, + num='4.4.4') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NotPresent = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL not reject authentication attempt using [LDAP] external user directory if any of the roles that are \n' + 'are mapped from [LDAP] but are not present locally.\n' + '\n' + ), + link=None, + level=3, + num='4.5.1') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Added = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL add the privileges provided by the [LDAP] mapped role when the\n' + 'role is not present during user authentication using [LDAP] external user directory\n' + 'as soon as the role is added.\n' + '\n' + ), + link=None, + level=3, + num='4.5.2') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Removed = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL remove the privileges provided by the role from all the\n' + 'users authenticated using [LDAP] external user directory if the [RBAC] role that was mapped\n' + 'as a result of [LDAP] search is removed.\n' + '\n' + ), + link=None, + level=3, + num='4.5.3') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Readded = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL reassign the [RBAC] role and add all the privileges provided by the role\n' + 'when it is re-added after removal for all [LDAP] users authenticated using external user directory\n' + 'for any role that was mapped as a result of [LDAP] search.\n' + '\n' + ), + link=None, + level=3, + num='4.5.4') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedAndAdded_Parallel = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support authenticating users using [LDAP] external user directory\n' + 'when [RBAC] roles that are mapped by [LDAP] groups\n' + 'are added and removed at the same time as [LDAP] user authentications are performed in parallel.\n' + '\n' + ), + link=None, + level=3, + num='4.5.5') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_New = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL not allow any new roles to be assigned to any\n' + 'users authenticated using [LDAP] external user directory unless the role is specified\n' + 'in the configuration of the external user directory or was mapped as a result of [LDAP] search.\n' + '\n' + ), + link=None, + level=3, + num='4.5.6') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NewPrivilege = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL add new privilege to all the users authenticated using [LDAP] external user directory\n' + 'when new privilege is added to one of the roles that were mapped as a result of [LDAP] search.\n' + '\n' + ), + link=None, + level=3, + num='4.5.7') + +RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedPrivilege = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL remove privilege from all the users authenticated using [LDAP] external user directory\n' + 'when the privilege that was provided by the mapped role is removed from all the roles \n' + 'that were mapped as a result of [LDAP] search.\n' + '\n' + ), + link=None, + level=3, + num='4.5.8') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support parallel authentication of users using [LDAP] server\n' + 'when using [LDAP] external user directory that has role mapping enabled.\n' + '\n' + ), + link=None, + level=3, + num='4.6.1') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support authentication of valid users and\n' + 'prohibit authentication of invalid users using [LDAP] server\n' + 'in parallel without having invalid attempts affecting valid authentications\n' + 'when using [LDAP] external user directory that has role mapping enabled.\n' + '\n' + ), + link=None, + level=3, + num='4.6.2') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_MultipleServers = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support parallel authentication of external [LDAP] users\n' + 'authenticated using multiple [LDAP] external user directories that have\n' + 'role mapping enabled.\n' + '\n' + ), + link=None, + level=3, + num='4.6.3') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalOnly = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support parallel authentication of users defined only locally\n' + 'when one or more [LDAP] external user directories with role mapping\n' + 'are specified in the configuration file.\n' + '\n' + ), + link=None, + level=3, + num='4.6.4') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalAndMultipleLDAP = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users\n' + 'authenticated using multiple [LDAP] external user directories with role mapping enabled.\n' + '\n' + ), + link=None, + level=3, + num='4.6.5') + +RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_SameUser = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user\n' + 'authenticated using the same [LDAP] external user directory with role mapping enabled.\n' + '\n' + ), + link=None, + level=3, + num='4.6.6') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the `` section\n' + 'of the `config.xml` that SHALL be used to construct the `DN` to bind to.\n' + 'The resulting `DN` SHALL be constructed by replacing all `{user_name}` substrings of the template \n' + 'with the actual user name during each authentication attempt.\n' + '\n' + 'For example, \n' + '\n' + '```xml\n' + '\n' + ' \n' + ' \n' + ' \n' + ' uid={user_name},ou=users,dc=example,dc=com\n' + ' \n' + ' \n' + ' \n' + '\n' + '```\n' + '\n' + ), + link=None, + level=4, + num='4.7.1.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN_ConflictWith_AuthDN = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL return an error if both `` and `` or `` parameters\n' + 'are specified as part of [LDAP] server description in the `` section of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.7.1.2') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Syntax = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `role_mapping` sub-section in the `` section\n' + 'of the `config.xml`.\n' + '\n' + 'For example,\n' + '\n' + '```xml\n' + '\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' ou=groups,dc=example,dc=com\n' + ' cn\n' + ' subtree\n' + ' (&(objectClass=groupOfNames)(member={bind_dn}))\n' + ' clickhouse_\n' + ' \n' + ' \n' + ' \n' + '\n' + '```\n' + '\n' + ), + link=None, + level=4, + num='4.8.1.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SpecialCharactersEscaping = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support properly escaped special XML characters that can be present\n' + 'as part of the values for different configuration parameters inside the\n' + '`` section of the `config.xml` such as\n' + '\n' + '* `` parameter\n' + '* `` parameter\n' + '\n' + ), + link=None, + level=4, + num='4.8.2.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support multiple `` sections defined inside the same `` section \n' + 'of the `config.xml` and all of the `` sections SHALL be applied.\n' + '\n' + ), + link=None, + level=4, + num='4.8.3.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections_IdenticalParameters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL not duplicate mapped roles when multiple `` sections \n' + 'with identical parameters are defined inside the `` section \n' + 'of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.8.3.2') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_BaseDN = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the `` section \n' + 'of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search.\n' + '\n' + 'The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of \n' + 'the template with the actual user name and bind `DN` during each [LDAP] search.\n' + '\n' + ), + link=None, + level=4, + num='4.8.4.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Attribute = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the `` section of \n' + 'the `config.xml` that SHALL specify the name of the attribute whose values SHALL be returned by the [LDAP] search.\n' + '\n' + ), + link=None, + level=4, + num='4.8.5.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the `` section of \n' + 'the `config.xml` that SHALL define the scope of the LDAP search as defined \n' + 'by the https://ldapwiki.com/wiki/LDAP%20Search%20Scopes.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Base = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `base` value for the the `` parameter in the \n' + '`` section of the `config.xml` that SHALL\n' + 'limit the scope as specified by the https://ldapwiki.com/wiki/BaseObject.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.2') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_OneLevel = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `one_level` value for the the `` parameter in the \n' + '`` section of the `config.xml` that SHALL\n' + 'limit the scope as specified by the https://ldapwiki.com/wiki/SingleLevel.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.3') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Children = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `children` value for the the `` parameter in the \n' + '`` section of the `config.xml` that SHALL\n' + 'limit the scope as specified by the https://ldapwiki.com/wiki/SubordinateSubtree.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.4') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Subtree = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `children` value for the the `` parameter in the \n' + '`` section of the `config.xml` that SHALL\n' + 'limit the scope as specified by the https://ldapwiki.com/wiki/WholeSubtree.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.5') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Default = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `subtree` as the default value for the the `` parameter in the \n' + '`` section of the `config.xml` when the `` parameter is not specified.\n' + '\n' + ), + link=None, + level=4, + num='4.8.6.6') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SearchFilter = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the ``\n' + 'section of the `config.xml` that SHALL specify the template used to construct \n' + 'the [LDAP filter](https://ldap.com/ldap-filters/) for the search.\n' + '\n' + 'The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings \n' + 'of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search.\n' + ' \n' + ), + link=None, + level=4, + num='4.8.7.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `` parameter in the ``\n' + 'section of the `config.xml` that SHALL be expected to be in front of each string in \n' + 'the original list of strings returned by the [LDAP] search. \n' + 'Prefix SHALL be removed from the original strings and resulting strings SHALL be treated as [RBAC] role names. \n' + '\n' + ), + link=None, + level=4, + num='4.8.8.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_Default = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support empty string as the default value of the `` parameter in \n' + 'the `` section of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.8.8.2') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithUTF8Characters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support UTF8 characters as the value of the `` parameter in\n' + 'the `` section of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.8.8.3') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialXMLCharacters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support XML special characters as the value of the `` parameter in\n' + 'the `` section of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.8.8.4') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support regex special characters as the value of the `` parameter in\n' + 'the `` section of the `config.xml`.\n' + '\n' + ), + link=None, + level=4, + num='4.8.8.5') + +QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification( + name='QA-SRS014 ClickHouse LDAP Role Mapping', + description=None, + author='vzakaznikov', + date='December 4, 2020', + status='-', + approved_by='-', + approved_date='-', + approved_version='-', + version=None, + group=None, + type=None, + link=None, + uid=None, + parent=None, + children=None, + headings=( + Heading(name='Revision History', level=1, num='1'), + Heading(name='Introduction', level=1, num='2'), + Heading(name='Terminology', level=1, num='3'), + Heading(name='LDAP', level=2, num='3.1'), + Heading(name='Requirements', level=1, num='4'), + Heading(name='General', level=2, num='4.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping', level=3, num='4.1.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles', level=3, num='4.1.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Search', level=3, num='4.1.3'), + Heading(name='Mapped Role Names', level=2, num='4.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters', level=3, num='4.2.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long', level=3, num='4.2.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters', level=3, num='4.2.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters', level=3, num='4.2.4'), + Heading(name='Multiple Roles', level=2, num='4.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles', level=3, num='4.3.1'), + Heading(name='LDAP Groups', level=2, num='4.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed', level=3, num='4.4.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel', level=3, num='4.4.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved', level=3, num='4.4.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel', level=3, num='4.4.4'), + Heading(name='RBAC Roles', level=2, num='4.5'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent', level=3, num='4.5.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added', level=3, num='4.5.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed', level=3, num='4.5.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded', level=3, num='4.5.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel', level=3, num='4.5.5'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New', level=3, num='4.5.6'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege', level=3, num='4.5.7'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege', level=3, num='4.5.8'), + Heading(name='Authentication', level=2, num='4.6'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel', level=3, num='4.6.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid', level=3, num='4.6.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers', level=3, num='4.6.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly', level=3, num='4.6.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP', level=3, num='4.6.5'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser', level=3, num='4.6.6'), + Heading(name='Server Configuration', level=2, num='4.7'), + Heading(name='BindDN Parameter', level=3, num='4.7.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN', level=4, num='4.7.1.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN', level=4, num='4.7.1.2'), + Heading(name='External User Directory Configuration', level=2, num='4.8'), + Heading(name='Syntax', level=3, num='4.8.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax', level=4, num='4.8.1.1'), + Heading(name='Special Characters Escaping', level=3, num='4.8.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping', level=4, num='4.8.2.1'), + Heading(name='Multiple Sections', level=3, num='4.8.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections', level=4, num='4.8.3.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters', level=4, num='4.8.3.2'), + Heading(name='BaseDN Parameter', level=3, num='4.8.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN', level=4, num='4.8.4.1'), + Heading(name='Attribute Parameter', level=3, num='4.8.5'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute', level=4, num='4.8.5.1'), + Heading(name='Scope Parameter', level=3, num='4.8.6'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope', level=4, num='4.8.6.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base', level=4, num='4.8.6.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel', level=4, num='4.8.6.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children', level=4, num='4.8.6.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree', level=4, num='4.8.6.5'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default', level=4, num='4.8.6.6'), + Heading(name='Search Filter Parameter', level=3, num='4.8.7'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter', level=4, num='4.8.7.1'), + Heading(name='Prefix Parameter', level=3, num='4.8.8'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix', level=4, num='4.8.8.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default', level=4, num='4.8.8.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters', level=4, num='4.8.8.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters', level=4, num='4.8.8.4'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters', level=4, num='4.8.8.5'), + Heading(name='References', level=1, num='5'), + ), + requirements=( + RQ_SRS_014_LDAP_RoleMapping, + RQ_SRS_014_LDAP_RoleMapping_WithFixedRoles, + RQ_SRS_014_LDAP_RoleMapping_Search, + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithUTF8Characters, + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_Long, + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialXMLCharacters, + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialRegexCharacters, + RQ_SRS_014_LDAP_RoleMapping_Map_MultipleRoles, + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_Removed, + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_RemovedAndAdded_Parallel, + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemoved, + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemovedAndAdded_Parallel, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NotPresent, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Added, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Removed, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Readded, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedAndAdded_Parallel, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_New, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NewPrivilege, + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedPrivilege, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_MultipleServers, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalOnly, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalAndMultipleLDAP, + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_SameUser, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN_ConflictWith_AuthDN, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Syntax, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SpecialCharactersEscaping, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections_IdenticalParameters, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_BaseDN, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Attribute, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Base, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_OneLevel, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Children, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Subtree, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Scope_Value_Default, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SearchFilter, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_Default, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithUTF8Characters, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialXMLCharacters, + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters, + ), + content=''' +# QA-SRS014 ClickHouse LDAP Role Mapping +# Software Requirements Specification + +(c) 2020 Altinity LTD. All Rights Reserved. + +**Document status:** Confidential + +**Author:** vzakaznikov + +**Date:** December 4, 2020 + +## Approval + +**Status:** - + +**Version:** - + +**Approved by:** - + +**Date:** - + +## Table of Contents + +* 1 [Revision History](#revision-history) +* 2 [Introduction](#introduction) +* 3 [Terminology](#terminology) + * 3.1 [LDAP](#ldap) +* 4 [Requirements](#requirements) + * 4.1 [General](#general) + * 4.1.1 [RQ.SRS-014.LDAP.RoleMapping](#rqsrs-014ldaprolemapping) + * 4.1.2 [RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles](#rqsrs-014ldaprolemappingwithfixedroles) + * 4.1.3 [RQ.SRS-014.LDAP.RoleMapping.Search](#rqsrs-014ldaprolemappingsearch) + * 4.2 [Mapped Role Names](#mapped-role-names) + * 4.2.1 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters](#rqsrs-014ldaprolemappingmaprolenamewithutf8characters) + * 4.2.2 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long](#rqsrs-014ldaprolemappingmaprolenamelong) + * 4.2.3 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialxmlcharacters) + * 4.2.4 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialregexcharacters) + * 4.3 [Multiple Roles](#multiple-roles) + * 4.3.1 [RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles](#rqsrs-014ldaprolemappingmapmultipleroles) + * 4.4 [LDAP Groups](#ldap-groups) + * 4.4.1 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed](#rqsrs-014ldaprolemappingldapgroupremoved) + * 4.4.2 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupremovedandaddedparallel) + * 4.4.3 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved](#rqsrs-014ldaprolemappingldapgroupuserremoved) + * 4.4.4 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupuserremovedandaddedparallel) + * 4.5 [RBAC Roles](#rbac-roles) + * 4.5.1 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent](#rqsrs-014ldaprolemappingrbacrolenotpresent) + * 4.5.2 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added](#rqsrs-014ldaprolemappingrbacroleadded) + * 4.5.3 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed](#rqsrs-014ldaprolemappingrbacroleremoved) + * 4.5.4 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded](#rqsrs-014ldaprolemappingrbacrolereadded) + * 4.5.5 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingrbacroleremovedandaddedparallel) + * 4.5.6 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New](#rqsrs-014ldaprolemappingrbacrolenew) + * 4.5.7 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege](#rqsrs-014ldaprolemappingrbacrolenewprivilege) + * 4.5.8 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege](#rqsrs-014ldaprolemappingrbacroleremovedprivilege) + * 4.6 [Authentication](#authentication) + * 4.6.1 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel](#rqsrs-014ldaprolemappingauthenticationparallel) + * 4.6.2 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid](#rqsrs-014ldaprolemappingauthenticationparallelvalidandinvalid) + * 4.6.3 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers](#rqsrs-014ldaprolemappingauthenticationparallelmultipleservers) + * 4.6.4 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly](#rqsrs-014ldaprolemappingauthenticationparallellocalonly) + * 4.6.5 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP](#rqsrs-014ldaprolemappingauthenticationparallellocalandmultipleldap) + * 4.6.6 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser](#rqsrs-014ldaprolemappingauthenticationparallelsameuser) + * 4.7 [Server Configuration](#server-configuration) + * 4.7.1 [BindDN Parameter](#binddn-parameter) + * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn) + * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn) + * 4.8 [External User Directory Configuration](#external-user-directory-configuration) + * 4.8.1 [Syntax](#syntax) + * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax) + * 4.8.2 [Special Characters Escaping](#special-characters-escaping) + * 4.8.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingspecialcharactersescaping) + * 4.8.3 [Multiple Sections](#multiple-sections) + * 4.8.3.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesections) + * 4.8.3.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesectionsidenticalparameters) + * 4.8.4 [BaseDN Parameter](#basedn-parameter) + * 4.8.4.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingbasedn) + * 4.8.5 [Attribute Parameter](#attribute-parameter) + * 4.8.5.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingattribute) + * 4.8.6 [Scope Parameter](#scope-parameter) + * 4.8.6.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscope) + * 4.8.6.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluebase) + * 4.8.6.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevalueonelevel) + * 4.8.6.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluechildren) + * 4.8.6.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluesubtree) + * 4.8.6.6 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluedefault) + * 4.8.7 [Search Filter Parameter](#search-filter-parameter) + * 4.8.7.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsearchfilter) + * 4.8.8 [Prefix Parameter](#prefix-parameter) + * 4.8.8.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefix) + * 4.8.8.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixdefault) + * 4.8.8.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithutf8characters) + * 4.8.8.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialxmlcharacters) + * 4.8.8.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialregexcharacters) +* 5 [References](#references) + +## Revision History + +This document is stored in an electronic form using [Git] source control management software +hosted in a [GitLab Repository]. +All the updates are tracked using the [Revision History]. + +## Introduction + +The [QA-SRS007 ClickHouse Authentication of Users via LDAP] added support for authenticating +users using an [LDAP] server and the [QA-SRS009 ClickHouse LDAP External User Directory] added +support for authenticating users using an [LDAP] external user directory. + +This requirements specification adds additional functionality for mapping [LDAP] groups to +the corresponding [ClickHouse] [RBAC] roles when [LDAP] external user directory is configured. +This functionality will enable easier access management for [LDAP] authenticated users +as the privileges granted by the roles can be granted or revoked by granting or revoking +a corresponding [LDAP] group to one or more [LDAP] users. + +For the use case when only [LDAP] user authentication is used, the roles can be +managed using [RBAC] in the same way as for non-[LDAP] authenticated users. + +## Terminology + +### LDAP + +* Lightweight Directory Access Protocol + +## Requirements + +### General + +#### RQ.SRS-014.LDAP.RoleMapping +version: 1.0 + +[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles +for users authenticated using [LDAP] external user directory. + +#### RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles +version: 1.0 + +[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles +for users authenticated using [LDAP] external user directory when +one or more roles are specified in the `` section. + +#### RQ.SRS-014.LDAP.RoleMapping.Search +version: 1.0 + +[ClickHouse] SHALL perform search on the [LDAP] server and map the results to [RBAC] role names +when authenticating users using the [LDAP] external user directory if the `` section is configured +as part of the [LDAP] external user directory. The matched roles SHALL be assigned to the user. + +### Mapped Role Names + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that contains UTF-8 characters. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name with more than 128 characters. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name that contains special characters that need to be escaped in XML. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name that contains special characters that need to be escaped in regex. + +### Multiple Roles + +#### RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles +version: 1.0 + +[ClickHouse] SHALL support mapping one or more [LDAP] search results for users authenticated using +[LDAP] external user directory to one or more [RBAC] role. + +### LDAP Groups + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed +version: 1.0 + +[ClickHouse] SHALL not assign [RBAC] role(s) for any users authenticated using [LDAP] external user directory +if the corresponding [LDAP] group(s) that map those role(s) are removed. Any users that have active sessions SHALL still +have privileges provided by the role(s) until the next time they are authenticated. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [LDAP] groups are removed and added +at the same time as [LDAP] user authentications are performed in parallel. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved +version: 1.0 + +[ClickHouse] SHALL not assign [RBAC] role(s) for the user authenticated using [LDAP] external user directory +if the user has been removed from the corresponding [LDAP] group(s) that map those role(s). +Any active user sessions SHALL have privileges provided by the role(s) until the next time the user is authenticated. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [LDAP] users are added and removed from [LDAP] groups used to map to [RBAC] roles +at the same time as [LDAP] user authentications are performed in parallel. + +### RBAC Roles + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent +version: 1.0 + +[ClickHouse] SHALL not reject authentication attempt using [LDAP] external user directory if any of the roles that are +are mapped from [LDAP] but are not present locally. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added +version: 1.0 + +[ClickHouse] SHALL add the privileges provided by the [LDAP] mapped role when the +role is not present during user authentication using [LDAP] external user directory +as soon as the role is added. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed +version: 1.0 + +[ClickHouse] SHALL remove the privileges provided by the role from all the +users authenticated using [LDAP] external user directory if the [RBAC] role that was mapped +as a result of [LDAP] search is removed. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded +version: 1.0 + +[ClickHouse] SHALL reassign the [RBAC] role and add all the privileges provided by the role +when it is re-added after removal for all [LDAP] users authenticated using external user directory +for any role that was mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [RBAC] roles that are mapped by [LDAP] groups +are added and removed at the same time as [LDAP] user authentications are performed in parallel. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New +version: 1.0 + +[ClickHouse] SHALL not allow any new roles to be assigned to any +users authenticated using [LDAP] external user directory unless the role is specified +in the configuration of the external user directory or was mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege +version: 1.0 + +[ClickHouse] SHALL add new privilege to all the users authenticated using [LDAP] external user directory +when new privilege is added to one of the roles that were mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege +version: 1.0 + +[ClickHouse] SHALL remove privilege from all the users authenticated using [LDAP] external user directory +when the privilege that was provided by the mapped role is removed from all the roles +that were mapped as a result of [LDAP] search. + +### Authentication + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users using [LDAP] server +when using [LDAP] external user directory that has role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid +version: 1.0 + +[ClickHouse] SHALL support authentication of valid users and +prohibit authentication of invalid users using [LDAP] server +in parallel without having invalid attempts affecting valid authentications +when using [LDAP] external user directory that has role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of external [LDAP] users +authenticated using multiple [LDAP] external user directories that have +role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users defined only locally +when one or more [LDAP] external user directories with role mapping +are specified in the configuration file. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users +authenticated using multiple [LDAP] external user directories with role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user +authenticated using the same [LDAP] external user directory with role mapping enabled. + +### Server Configuration + +#### BindDN Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section +of the `config.xml` that SHALL be used to construct the `DN` to bind to. +The resulting `DN` SHALL be constructed by replacing all `{user_name}` substrings of the template +with the actual user name during each authentication attempt. + +For example, + +```xml + + + + + uid={user_name},ou=users,dc=example,dc=com + + + + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN +version: 1.0 + +[ClickHouse] SHALL return an error if both `` and `` or `` parameters +are specified as part of [LDAP] server description in the `` section of the `config.xml`. + +### External User Directory Configuration + +#### Syntax + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax +version: 1.0 + +[ClickHouse] SHALL support the `role_mapping` sub-section in the `` section +of the `config.xml`. + +For example, + +```xml + + + + + + ou=groups,dc=example,dc=com + cn + subtree + (&(objectClass=groupOfNames)(member={bind_dn})) + clickhouse_ + + + + +``` + +#### Special Characters Escaping + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping +version: 1.0 + +[ClickHouse] SHALL support properly escaped special XML characters that can be present +as part of the values for different configuration parameters inside the +`` section of the `config.xml` such as + +* `` parameter +* `` parameter + +#### Multiple Sections + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections +version: 1.0 + +[ClickHouse] SHALL support multiple `` sections defined inside the same `` section +of the `config.xml` and all of the `` sections SHALL be applied. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters +version: 1.0 + +[ClickHouse] SHALL not duplicate mapped roles when multiple `` sections +with identical parameters are defined inside the `` section +of the `config.xml`. + +#### BaseDN Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section +of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search. + +The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of +the template with the actual user name and bind `DN` during each [LDAP] search. + +#### Attribute Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section of +the `config.xml` that SHALL specify the name of the attribute whose values SHALL be returned by the [LDAP] search. + +#### Scope Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section of +the `config.xml` that SHALL define the scope of the LDAP search as defined +by the https://ldapwiki.com/wiki/LDAP%20Search%20Scopes. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base +version: 1.0 + +[ClickHouse] SHALL support the `base` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/BaseObject. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel +version: 1.0 + +[ClickHouse] SHALL support the `one_level` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/SingleLevel. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children +version: 1.0 + +[ClickHouse] SHALL support the `children` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/SubordinateSubtree. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree +version: 1.0 + +[ClickHouse] SHALL support the `children` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/WholeSubtree. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default +version: 1.0 + +[ClickHouse] SHALL support the `subtree` as the default value for the the `` parameter in the +`` section of the `config.xml` when the `` parameter is not specified. + +#### Search Filter Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` +section of the `config.xml` that SHALL specify the template used to construct +the [LDAP filter](https://ldap.com/ldap-filters/) for the search. + +The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings +of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search. + +#### Prefix Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` +section of the `config.xml` that SHALL be expected to be in front of each string in +the original list of strings returned by the [LDAP] search. +Prefix SHALL be removed from the original strings and resulting strings SHALL be treated as [RBAC] role names. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default +version: 1.0 + +[ClickHouse] SHALL support empty string as the default value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters +version: 1.0 + +[ClickHouse] SHALL support UTF8 characters as the value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters +version: 1.0 + +[ClickHouse] SHALL support XML special characters as the value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters +version: 1.0 + +[ClickHouse] SHALL support regex special characters as the value of the `` parameter in +the `` section of the `config.xml`. + +## References + +* **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/ +* **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +* **ClickHouse:** https://clickhouse.tech +* **GitLab Repository**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +* **Revision History**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +* **Git:** https://git-scm.com/ + +[RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/ +[SRS]: #srs +[Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/ +[QA-SRS009 ClickHouse LDAP External User Directory]: https://gitlab.com/altinity-qa/documents/qa-srs009-clickhouse-ldap-external-user-directory/-/blob/master/QA_SRS009_ClickHouse_LDAP_External_User_Directory.md +[QA-SRS007 ClickHouse Authentication of Users via LDAP]: https://gitlab.com/altinity-qa/documents/qa-srs007-clickhouse-athentication-of-users-via-ldap/-/blob/master/QA_SRS007_ClickHouse_Authentication_of_Users_via_LDAP.md +[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +[ClickHouse]: https://clickhouse.tech +[GitLab Repository]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +[Revision History]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +[Git]: https://git-scm.com/ +[GitLab]: https://gitlab.com +''') diff --git a/tests/testflows/ldap/role_mapping/tests/common.py b/tests/testflows/ldap/role_mapping/tests/common.py new file mode 100644 index 00000000000..33ad4a46f52 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/tests/common.py @@ -0,0 +1,252 @@ +import os + +from testflows.core import * +from testflows.asserts import error + +from ldap.authentication.tests.common import getuid, create_ldap_servers_config_content, add_config, Config +from ldap.external_user_directory.tests.common import rbac_roles, rbac_users, ldap_users +from ldap.authentication.tests.common import xmltree, xml_indent, xml_append, xml_with_utf8 + +@TestStep(Given) +def create_table(self, name, create_statement, on_cluster=False): + """Create table. + """ + node = current().context.node + try: + with Given(f"I have a {name} table"): + node.query(create_statement.format(name=name)) + yield name + finally: + with Finally("I drop the table"): + if on_cluster: + node.query(f"DROP TABLE IF EXISTS {name} ON CLUSTER {on_cluster}") + else: + node.query(f"DROP TABLE IF EXISTS {name}") + +@TestStep(Given) +def add_ldap_servers_configuration(self, servers, config_d_dir="/etc/clickhouse-server/config.d", + config_file="ldap_servers.xml", timeout=60, restart=False): + """Add LDAP servers configuration to config.xml. + """ + config = create_ldap_servers_config_content(servers, config_d_dir, config_file) + return add_config(config, restart=restart) + +@TestStep(Given) +def add_ldap_groups(self, groups, node=None): + """Add multiple new groups to the LDAP server. + """ + try: + _groups = [] + for group in groups: + with By(f"adding group {group['cn']}"): + _groups.append(add_group_to_ldap(**group, node=node)) + yield _groups + finally: + with Finally(f"I delete groups from LDAP"): + for _group in _groups: + delete_group_from_ldap(_group, node=node) + +@TestStep(Given) +def add_ldap_external_user_directory(self, server, roles=None, role_mappings=None, + config_d_dir="/etc/clickhouse-server/config.d", + config_file=None, timeout=60, restart=True, config=None): + """Add LDAP external user directory. + """ + if config_file is None: + config_file = f"ldap_external_user_directory_with_role_mapping_{getuid()}.xml" + + if config is None: + config = create_ldap_external_user_directory_config_content(server=server, roles=roles, + role_mappings=role_mappings, config_d_dir=config_d_dir, config_file=config_file) + + return add_config(config, restart=restart) + +@TestStep(Given) +def add_rbac_roles(self, roles): + """Add RBAC roles. + """ + with rbac_roles(*roles) as _roles: + yield _roles + +@TestStep(Given) +def add_rbac_users(self, users): + """Add RBAC users. + """ + with rbac_users(*users) as _users: + yield _users + +@TestStep(Given) +def add_ldap_users(self, users, node=None): + """Add LDAP users. + """ + with ldap_users(*users, node=node) as _users: + yield _users + +def add_group_to_ldap(cn, gidnumber=None, node=None, _gidnumber=[600], exitcode=0): + """Add new group entry to LDAP. + """ + _gidnumber[0] += 1 + + if node is None: + node = current().context.ldap_node + + if gidnumber is None: + gidnumber = _gidnumber[0] + + group = { + "dn": f"cn={cn},ou=groups,dc=company,dc=com", + "objectclass": ["top", "groupOfUniqueNames"], + "uniquemember": "cn=admin,dc=company,dc=com", + "_server": node.name + } + + lines = [] + + for key, value in list(group.items()): + if key.startswith("_"): + continue + elif type(value) is list: + for v in value: + lines.append(f"{key}: {v}") + else: + lines.append(f"{key}: {value}") + + ldif = "\n".join(lines) + + r = node.command( + f"echo -e \"{ldif}\" | ldapadd -x -H ldap://localhost -D \"cn=admin,dc=company,dc=com\" -w admin") + + if exitcode is not None: + assert r.exitcode == exitcode, error() + + return group + +def delete_group_from_ldap(group, node=None, exitcode=0): + """Delete group entry from LDAP. + """ + if node is None: + node = current().context.ldap_node + + with By(f"deleting group {group['dn']}"): + r = node.command( + f"ldapdelete -x -H ldap://localhost -D \"cn=admin,dc=company,dc=com\" -w admin \"{group['dn']}\"") + + if exitcode is not None: + assert r.exitcode == exitcode, error() + +def fix_ldap_permissions(node=None, exitcode=0): + """Fix LDAP access permissions. + """ + if node is None: + node = current().context.ldap_node + + ldif = ( + "dn: olcDatabase={1}mdb,cn=config\n" + "changetype: modify\n" + "delete: olcAccess\n" + "-\n" + "add: olcAccess\n" + "olcAccess: to attrs=userPassword,shadowLastChange by self write by dn=\\\"cn=admin,dc=company,dc=com\\\" write by anonymous auth by * none\n" + "olcAccess: to * by self write by dn=\\\"cn=admin,dc=company,dc=com\\\" read by users read by * none" + ) + + r = node.command( + f"echo -e \"{ldif}\" | ldapmodify -Y EXTERNAL -Q -H ldapi:///") + + if exitcode is not None: + assert r.exitcode == exitcode, error() + +def add_user_to_group_in_ldap(user, group, node=None, exitcode=0): + """Add user to a group in LDAP. + """ + if node is None: + node = current().context.ldap_node + + ldif = (f"dn: {group['dn']}\n" + "changetype: modify\n" + "add: uniquemember\n" + f"uniquemember: {user['dn']}") + + with By(f"adding user {user['dn']} to group {group['dn']}"): + r = node.command( + f"echo -e \"{ldif}\" | ldapmodify -x -H ldap://localhost -D \"cn=admin,dc=company,dc=com\" -w admin") + + if exitcode is not None: + assert r.exitcode == exitcode, error() + +def delete_user_from_group_in_ldap(user, group, node=None, exitcode=0): + """Delete user from a group in LDAP. + """ + if node is None: + node = current().context.ldap_node + + ldif = (f"dn: {group['dn']}\n" + "changetype: modify\n" + "delete: uniquemember\n" + f"uniquemember: {user['dn']}") + + with By(f"deleting user {user['dn']} from group {group['dn']}"): + r = node.command( + f"echo -e \"{ldif}\" | ldapmodify -x -H ldap://localhost -D \"cn=admin,dc=company,dc=com\" -w admin") + + if exitcode is not None: + assert r.exitcode == exitcode, error() + +def create_xml_config_content(entries, config_d_dir="/etc/clickhouse-server/config.d", + config_file="ldap_external_user_directories.xml"): + """Create XML configuration file from a dictionary. + """ + uid = getuid() + path = os.path.join(config_d_dir, config_file) + name = config_file + root = xmltree.Element("yandex") + root.append(xmltree.Comment(text=f"config uid: {uid}")) + + def create_xml_tree(entries, root): + for k,v in entries.items(): + if type(v) is dict: + xml_element = xmltree.Element(k) + create_xml_tree(v, xml_element) + root.append(xml_element) + elif type(v) in (list, tuple): + xml_element = xmltree.Element(k) + for e in v: + create_xml_tree(e, xml_element) + root.append(xml_element) + else: + xml_append(root, k, v) + + create_xml_tree(entries, root) + xml_indent(root) + content = xml_with_utf8 + str(xmltree.tostring(root, short_empty_elements=False, encoding="utf-8"), "utf-8") + + return Config(content, path, name, uid, "config.xml") + +def create_ldap_external_user_directory_config_content(server=None, roles=None, role_mappings=None, **kwargs): + """Create LDAP external user directory configuration file content. + """ + entries = { + "user_directories": { + "ldap": { + } + } + } + + entries["user_directories"]["ldap"] = [] + + if server: + entries["user_directories"]["ldap"].append({"server": server}) + + if roles: + entries["user_directories"]["ldap"].append({"roles": [{r: None} for r in roles]}) + + if role_mappings: + for role_mapping in role_mappings: + entries["user_directories"]["ldap"].append({"role_mapping": role_mapping}) + + return create_xml_config_content(entries, **kwargs) + +def create_entries_ldap_external_user_directory_config_content(entries, **kwargs): + """Create LDAP external user directory configuration file content. + """ + return create_xml_config_content(entries, **kwargs) \ No newline at end of file diff --git a/tests/testflows/ldap/role_mapping/tests/mapping.py b/tests/testflows/ldap/role_mapping/tests/mapping.py new file mode 100644 index 00000000000..97188199782 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/tests/mapping.py @@ -0,0 +1,1372 @@ +# -*- coding: utf-8 -*- +from testflows.core import * +from testflows.asserts import error + +from multiprocessing.dummy import Pool + +from ldap.role_mapping.requirements import * +from ldap.role_mapping.tests.common import * +from ldap.external_user_directory.tests.common import join, randomword + +from ldap.external_user_directory.tests.authentications import login_with_valid_username_and_password +from ldap.external_user_directory.tests.authentications import login_with_invalid_username_and_valid_password +from ldap.external_user_directory.tests.authentications import login_with_valid_username_and_invalid_password + +def remove_ldap_groups_in_parallel(groups, i, iterations=10): + """Remove LDAP groups. + """ + with When(f"LDAP groups are removed #{i}"): + for j in range(iterations): + for group in groups: + with When(f"I delete group #{j}", description=f"{group}"): + delete_group_from_ldap(group, exitcode=None) + +def add_ldap_groups_in_parallel(ldap_user, names, i, iterations=10): + """Add LDAP groups. + """ + with When(f"LDAP groups are added #{i}"): + for j in range(iterations): + for name in names: + with When(f"I add group {name} #{j}", description=f"{name}"): + group = add_group_to_ldap(cn=name, exitcode=None) + + with When(f"I add user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=group, exitcode=None) + +def add_user_to_ldap_groups_in_parallel(ldap_user, groups, i, iterations=10): + """Add user to LDAP groups. + """ + with When(f"user is added to LDAP groups #{i}"): + for j in range(iterations): + for group in groups: + with When(f"I add user to the group {group['dn']} #{j}"): + add_user_to_group_in_ldap(user=ldap_user, group=group, exitcode=None) + +def remove_user_from_ldap_groups_in_parallel(ldap_user, groups, i, iterations=10): + """Remove user from LDAP groups. + """ + with When(f"user is removed from LDAP groups #{i}"): + for j in range(iterations): + for group in groups: + with When(f"I remove user from the group {group['dn']} #{j}"): + delete_user_from_group_in_ldap(user=ldap_user, group=group, exitcode=None) + +def add_roles_in_parallel(role_names, i, iterations=10): + """Add roles. + """ + with When(f"roles are added #{i}"): + for j in range(iterations): + for role_name in role_names: + with When(f"I add role {role_name} #{j}"): + current().context.node.query(f"CREATE ROLE OR REPLACE {role_name}") + +def remove_roles_in_parallel(role_names, i, iterations=10): + """Remove roles. + """ + with When(f"roles are removed #{i}"): + for j in range(iterations): + for role_name in role_names: + with When(f"I remove role {role_name} #{j}"): + current().context.node.query(f"DROP ROLE IF EXISTS {role_name}") + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Map_MultipleRoles("1.0") +) +def multiple_roles(self, ldap_server, ldap_user): + """Check that users authenticated using LDAP external user directory + can be assigned multiple LDAP mapped roles. + """ + uid = getuid() + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix":"" + } + ] + + with Given("I add LDAP groups"): + groups = add_ldap_groups(groups=({"cn": f"role0_{uid}"}, {"cn": f"role1_{uid}"})) + + with And("I add LDAP user to each LDAP group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + add_user_to_group_in_ldap(user=ldap_user, group=groups[1]) + + with And("I add RBAC roles"): + roles = add_rbac_roles(roles=(f"role0_{uid}", f"role1_{uid}")) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped LDAP roles"): + with By(f"checking that first role is assigned", description=f"{roles[0]}"): + assert roles[0] in r.output, error() + with And(f"checking that second role is also assigned", description=f"{roles[1]}"): + assert roles[1] in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_WithFixedRoles("1.0") +) +def with_fixed_roles(self, ldap_server, ldap_user): + """Check that LDAP users can be assigned roles dynamically + and statically using the `` section. + """ + uid = getuid() + role_name = f"role_{uid}" + fixed_role_name = f"role_fixed_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + mapped_roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I add an RBAC role that will be added statically"): + roles = add_rbac_roles(roles=(f"{fixed_role_name}",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, roles=roles, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped and fixed roles"): + with By("checking that mapped role is assigned"): + assert mapped_roles[0].strip("'") in r.output, error() + with And("checking that fixed role is assigned"): + assert roles[0] in r.output, error() + +@TestOutline +def map_role(self, role_name, ldap_server, ldap_user, rbac_role_name=None, role_mappings=None): + """Check that we can map a role with a given name. + """ + if role_mappings is None: + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + if rbac_role_name is None: + rbac_role_name = role_name + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"'{rbac_role_name}'",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped LDAP role"): + with By(f"checking that the role is assigned", description=f"{role_name}"): + assert roles[0].strip("'") in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithUTF8Characters("1.0") +) +def role_name_with_utf8_characters(self, ldap_server, ldap_user): + """Check that we can map a role that contains UTF8 characters. + """ + uid = getuid() + role_name = f"role_{uid}_Gãńdåłf_Thê_Gręât" + + map_role(role_name=role_name, ldap_server=ldap_server, ldap_user=ldap_user) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_Long("1.0") +) +def role_name_with_more_than_128_characters(self, ldap_server, ldap_user): + """Check that we can map a role that contains more than 128 characters. + """ + uid = getuid() + role_name = f"role_{uid}_{'r'*128}" + + map_role(role_name=role_name, ldap_server=ldap_server, ldap_user=ldap_user) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialXMLCharacters("1.0") +) +def role_name_with_special_xml_characters(self, ldap_server, ldap_user): + """Check that we can map a role that contains special XML + characters that must be escaped. + """ + uid = getuid() + role_name = f"role_{uid}_\\<\\>" + rbac_role_name = f"role_{uid}_<>" + + map_role(role_name=role_name, ldap_server=ldap_server, ldap_user=ldap_user, rbac_role_name=rbac_role_name) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Map_Role_Name_WithSpecialRegexCharacters("1.0") +) +def role_name_with_special_regex_characters(self, ldap_server, ldap_user): + """Check that we can map a role that contains special regex + characters that must be escaped. + """ + uid = getuid() + role_name = f"role_{uid}_\\+.?$" + rbac_role_name = f"role_{uid}_+.?$" + + map_role(role_name=role_name, ldap_server=ldap_server, ldap_user=ldap_user, rbac_role_name=rbac_role_name) + +@TestOutline +def map_groups_with_prefixes(self, prefixes, group_names, role_names, + expected, not_expected, ldap_server, ldap_user): + """Check that we can map multiple groups to roles whith one or more prefixes. + """ + role_mappings = [] + + for prefix in prefixes: + role_mappings.append({ + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": prefix + }) + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": name} for name in group_names)) + + with And("I add LDAP user to the group"): + for group in groups: + add_user_to_group_in_ldap(user=ldap_user, group=group) + + with And("I add RBAC roles"): + roles = add_rbac_roles(roles=(f"'{name}'" for name in role_names)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped roles"): + with By(f"checking that the roles are assigned", description=f"{', '.join(expected)}"): + for name in expected: + assert name in r.output, error() + + with And("I expect the user not to have mapped roles"): + with By(f"checking that the roles are not assigned", description=f"{', '.join(not_expected)}"): + for name in not_expected: + assert name not in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Syntax("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix("1.0") +) +def prefix_non_empty(self, ldap_server, ldap_user): + """Check that only group names with specified prefix are mapped to roles + when prefix is not empty. + """ + uid = getuid() + + with Given("I define group names"): + group_names=[ + f"clickhouse_role_{uid}", + f"role0_{uid}" + ] + + with And("I define role names"): + role_names=[ + f"role_{uid}", + f"role0_{uid}" + ] + + with And("I define group prefixes to be mapped"): + prefixes = ["clickhouse_"] + + with And("I define the expected mapped and not mapped roles"): + expected=[f"role_{uid}"] + not_expected=[f"role0_{uid}"] + + map_groups_with_prefixes(ldap_server=ldap_server, ldap_user=ldap_user, + prefixes=prefixes, group_names=group_names, role_names=role_names, + expected=expected, not_expected=not_expected) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_Default("1.0") +) +def prefix_default_value(self, ldap_server, ldap_user): + """Check that when prefix is not specified the default value of prefix + is empty and therefore ldap groups are mapped directly to roles. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + } + ] + + map_role(role_name=role_name, ldap_server=ldap_server, ldap_user=ldap_user, role_mappings=role_mappings) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithUTF8Characters("1.0") +) +def prefix_with_utf8_characters(self, ldap_server, ldap_user): + """Check that we can map a role when prefix contains UTF8 characters. + """ + uid = getuid() + + with Given("I define group names"): + group_names=[ + f"Gãńdåłf_Thê_Gręât_role_{uid}", + f"role0_{uid}" + ] + + with And("I define role names"): + role_names=[ + f"role_{uid}", + f"role0_{uid}" + ] + + with And("I define group prefixes to be mapped"): + prefixes = ["Gãńdåłf_Thê_Gręât_"] + + with And("I define the expected mapped and not mapped roles"): + expected=[f"role_{uid}"] + not_expected=[f"role0_{uid}"] + + map_groups_with_prefixes(ldap_server=ldap_server, ldap_user=ldap_user, + prefixes=prefixes, group_names=group_names, role_names=role_names, + expected=expected, not_expected=not_expected) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SpecialCharactersEscaping("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialXMLCharacters("1.0") +) +def prefix_with_special_xml_characters(self, ldap_server, ldap_user): + """Check that we can map a role when prefix contains special XML characters. + """ + uid = getuid() + + with Given("I define group names"): + group_names=[ + f"clickhouse\\<\\>_role_{uid}", + f"role0_{uid}" + ] + + with And("I define role names"): + role_names=[ + f"role_{uid}", + f"role0_{uid}" + ] + + with And("I define group prefixes to be mapped"): + prefixes = ["clickhouse<>_"] + + with And("I define the expected mapped and not mapped roles"): + expected=[f"role_{uid}"] + not_expected=[f"role0_{uid}"] + + map_groups_with_prefixes(ldap_server=ldap_server, ldap_user=ldap_user, + prefixes=prefixes, group_names=group_names, role_names=role_names, + expected=expected, not_expected=not_expected) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters("1.0") +) +def prefix_with_special_regex_characters(self, ldap_server, ldap_user): + """Check that we can map a role when prefix contains special regex characters. + """ + uid = getuid() + + with Given("I define group names"): + group_names=[ + f"clickhouse\\+.?\\$_role_{uid}", + f"role0_{uid}" + ] + + with And("I define role names"): + role_names=[ + f"role_{uid}", + f"role0_{uid}" + ] + + with And("I define group prefixes to be mapped"): + prefixes = ["clickhouse+.?\\$_"] + + with And("I define the expected mapped and not mapped roles"): + expected=[f"role_{uid}"] + not_expected=[f"role0_{uid}"] + + map_groups_with_prefixes(ldap_server=ldap_server, ldap_user=ldap_user, + prefixes=prefixes, group_names=group_names, role_names=role_names, + expected=expected, not_expected=not_expected) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections("1.0") +) +def multiple_sections_with_different_prefixes(self, ldap_server, ldap_user): + """Check that we can map multiple roles with multiple role mapping sections + that use different prefixes. + """ + uid = getuid() + + with Given("I define group names"): + group_names=[ + f"clickhouse0_role0_{uid}", + f"clickhouse1_role1_{uid}", + f"role2_{uid}" + ] + + with And("I define role names"): + role_names=[ + f"role0_{uid}", + f"role1_{uid}", + f"role2_{uid}" + ] + + with And("I define group prefixes to be mapped"): + prefixes = ["clickhouse0_", "clickhouse1_"] + + with And("I define the expected mapped and not mapped roles"): + expected=[f"role0_{uid}", f"role1_{uid}"] + not_expected=[f"role2_{uid}"] + + map_groups_with_prefixes(ldap_server=ldap_server, ldap_user=ldap_user, + prefixes=prefixes, group_names=group_names, role_names=role_names, + expected=expected, not_expected=not_expected) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_Removed("1.0") +) +def group_removed(self, ldap_server, ldap_user): + """Check that roles are not mapped after the corresponding LDAP group + is removed. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + try: + with Given("I add LDAP group"): + group = add_group_to_ldap(**{"cn": role_name}) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=group) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped LDAP role"): + with By(f"checking that the role is assigned", description=f"{role_name}"): + assert role_name in r.output, error() + finally: + with Finally("I remove LDAP group"): + delete_group_from_ldap(group) + + with When(f"I login as an LDAP user after LDAP group is removed"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user not to have mapped LDAP role"): + with By(f"checking that the role is not assigned", description=f"{role_name}"): + assert role_name not in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemoved("1.0") +) +def user_removed_from_group(self, ldap_server, ldap_user): + """Check that roles are not mapped after the user has been removed + from the corresponding LDAP group. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped LDAP role"): + with By(f"checking that the role is assigned", description=f"{role_name}"): + assert role_name in r.output, error() + + with When("I remove user from the LDAP group"): + delete_user_from_group_in_ldap(user=ldap_user, group=groups[0]) + + with And(f"I login as an LDAP user after user has been removed from the group"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user not to have mapped LDAP role"): + with By(f"checking that the role is not assigned", description=f"{role_name}"): + assert role_name not in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NotPresent("1.0") +) +def role_not_present(self, ldap_server, ldap_user): + """Check that LDAP users can still be authenticated even if + the mapped role is not present. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group for which no matching roles are present"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])], no_checks=True) + + with Then("I expect the login to succeed"): + assert r.exitcode == 0, error() + + with And("the user not to have any mapped LDAP role"): + assert r.output == "", error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Removed("1.0"), + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Readded("1.0") +) +def role_removed_and_readded(self, ldap_server, ldap_user): + """Check that when a mapped role is removed the privileges provided by the role + are revoked from all the authenticated LDAP users and when the role + is added back the privileges to the authenticated LDAP users are re-granted. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I create a table for which the role will provide privilege"): + table_name = create_table(name=f"table_{uid}", + create_statement="CREATE TABLE {name} (d DATE, s String, i UInt8) ENGINE = Memory()") + + with And("I grant select privilege on the table to the role"): + self.context.node.query(f"GRANT SELECT ON {table_name} TO {role_name}") + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as LDAP user using clickhouse-client"): + with self.context.cluster.shell(node=self.context.node.name) as shell: + with shell( + f"TERM=dumb clickhouse client --user {ldap_user['username']} --password {ldap_user['password']}", + asynchronous=True, name="client") as client: + client.app.expect("clickhouse1 :\) ") + + with When("I execute SHOW GRANTS"): + client.app.send(f"SHOW GRANTS") + + with Then("I expect the user to have the mapped role"): + client.app.expect(f"{role_name}") + client.app.expect("clickhouse1 :\) ") + + with When("I execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get no errors"): + client.app.expect("Ok\.") + client.app.expect("clickhouse1 :\) ") + + with When("I remove the role that grants the privilege"): + self.context.node.query(f"DROP ROLE {role_name}") + + with And("I re-execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get not enough privileges error"): + client.app.expect(f"DB::Exception: {ldap_user['username']}: Not enough privileges.") + client.app.expect("clickhouse1 :\) ") + + with When("I add the role that grant the privilege back"): + self.context.node.query(f"CREATE ROLE {role_name}") + self.context.node.query(f"GRANT SELECT ON {table_name} TO {role_name}") + + with And("I execute select on the table after role is added back"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get no errors"): + client.app.expect("Ok\.") + client.app.expect("clickhouse1 :\) ") + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_NewPrivilege("1.0"), + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedPrivilege("1.0") +) +def privilege_new_and_removed(self, ldap_server, ldap_user): + """Check that when a new privilege is added to the mapped role + it is granted to all authenticated LDAP users and when + the privilege is removed from the role it is also revoked + from all authenticated LDAP users. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I create a table for which the role will provide privilege"): + table_name = create_table(name=f"table_{uid}", + create_statement="CREATE TABLE {name} (d DATE, s String, i UInt8) ENGINE = Memory()") + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as LDAP user using clickhouse-client"): + with self.context.cluster.shell(node=self.context.node.name) as shell: + with shell( + f"TERM=dumb clickhouse client --user {ldap_user['username']} --password {ldap_user['password']}", + asynchronous=True, name="client") as client: + client.app.expect("clickhouse1 :\) ") + + with When("I execute SHOW GRANTS"): + client.app.send(f"SHOW GRANTS") + + with Then("I expect the user to have the mapped role"): + client.app.expect(f"{role_name}") + client.app.expect("clickhouse1 :\) ") + + with And("I execute select on the table when the mapped role does not provide this privilege"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get not enough privileges error"): + client.app.expect(f"DB::Exception: {ldap_user['username']}: Not enough privileges.") + client.app.expect("clickhouse1 :\) ") + + with When("I grant select privilege on the table to the mapped role"): + self.context.node.query(f"GRANT SELECT ON {table_name} TO {role_name}") + + with And("I execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get no errors"): + client.app.expect("Ok\.") + client.app.expect("clickhouse1 :\) ") + + with When("I remove the privilege from the mapped role"): + self.context.node.query(f"REVOKE SELECT ON {table_name} FROM {role_name}") + + with And("I re-execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get not enough privileges error"): + client.app.expect(f"DB::Exception: {ldap_user['username']}: Not enough privileges.") + client.app.expect("clickhouse1 :\) ") + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_Added("1.0") +) +def role_added(self, ldap_server, ldap_user): + """Check that when the mapped role is not present during LDAP user authentication but + is later added then the authenticated LDAP users is granted the privileges provided + by the mapped role. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I create a table for which the role will provide privilege"): + table_name = create_table(name=f"table_{uid}", + create_statement="CREATE TABLE {name} (d DATE, s String, i UInt8) ENGINE = Memory()") + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as LDAP user using clickhouse-client"): + with self.context.cluster.shell(node=self.context.node.name) as shell: + with shell( + f"TERM=dumb clickhouse client --user {ldap_user['username']} --password {ldap_user['password']}", + asynchronous=True, name="client") as client: + client.app.expect("clickhouse1 :\) ") + + with When("I execute SHOW GRANTS"): + client.app.send(f"SHOW GRANTS") + + with Then("I expect the user not to have any mapped role"): + client.app.expect(f"Ok\.") + client.app.expect("clickhouse1 :\) ") + + with And("I execute select on the table"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get not enough privileges error"): + client.app.expect(f"DB::Exception: {ldap_user['username']}: Not enough privileges.") + client.app.expect("clickhouse1 :\) ") + + with When("I add the role that grant the privilege"): + self.context.node.query(f"CREATE ROLE {role_name}") + self.context.node.query(f"GRANT SELECT ON {table_name} TO {role_name}") + + with And("I execute select on the table after role is added"): + client.app.send(f"SELECT * FROM {table_name} LIMIT 1") + + with Then("I expect to get no errors"): + client.app.expect("Ok\.") + client.app.expect("clickhouse1 :\) ") + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_New("1.0") +) +def role_new(self, ldap_server, ldap_user): + """Check that no new roles can be granted to LDAP authenticated users. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + message = f"DB::Exception: Cannot update user `{ldap_user['username']}` in ldap because this storage is readonly" + exitcode = 239 + + with Given("I a have RBAC role that is not mapped"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as LDAP user using clickhouse-client"): + with self.context.cluster.shell(node=self.context.node.name) as shell: + with shell( + f"TERM=dumb clickhouse client --user {ldap_user['username']} --password {ldap_user['password']}", + asynchronous=True, name="client") as client: + client.app.expect("clickhouse1 :\) ") + + with When("I try to grant new role to user"): + self.context.node.query(f"GRANT {role_name} TO {ldap_user['username']}", + message=message, exitcode=exitcode) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections_IdenticalParameters("1.0") +) +def multiple_sections_with_identical_parameters(self, ldap_server, ldap_user): + """Check behaviour when multiple role mapping sections + have exactly the same parameters. + """ + uid = getuid() + role_name = f"role_{uid}" + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] * 4 + + with Given("I add LDAP group"): + groups = add_ldap_groups(groups=({"cn": role_name},)) + + with And("I add LDAP user to the group"): + add_user_to_group_in_ldap(user=ldap_user, group=groups[0]) + + with And("I add matching RBAC role"): + roles = add_rbac_roles(roles=(f"{role_name}",)) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + with When(f"I login as an LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", ldap_user["username"]), ("password", ldap_user["password"])]) + + with Then("I expect the user to have mapped LDAP role"): + with By(f"checking that the role is assigned", description=f"{role_name}"): + assert roles[0].strip("'") in r.output, error() + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_RemovedAndAdded_Parallel("1.0") +) +def group_removed_and_added_in_parallel(self, ldap_server, ldap_user, count=20, timeout=200): + """Check that user can be authenticated successfully when LDAP groups + are removed and added in parallel. + """ + uid = getuid() + role_names = [f"role{i}_{uid}" for i in range(count)] + users = [{"cn": ldap_user["username"], "userpassword": ldap_user["password"]}] + groups = [] + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + try: + with Given("I initially add all LDAP groups"): + for role_name in role_names: + with When(f"I add LDAP groop {role_name}"): + group = add_group_to_ldap(**{"cn": role_name}) + with And(f"I add LDAP user to the group {role_name}"): + add_user_to_group_in_ldap(user=ldap_user, group=group) + groups.append(group) + + with And("I add RBAC roles"): + add_rbac_roles(roles=role_names) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + tasks = [] + try: + with When("user try to login while LDAP groups are added and removed in parallel"): + p = Pool(15) + for i in range(15): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(remove_ldap_groups_in_parallel, (groups, i, 10,))) + tasks.append(p.apply_async(add_ldap_groups_in_parallel,(ldap_user, role_names, i, 10,))) + + finally: + with Finally("it should work", flags=TE): + join(tasks, timeout) + finally: + with Finally("I clean up all LDAP groups"): + for group in groups: + delete_group_from_ldap(group, exitcode=None) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_LDAP_Group_UserRemovedAndAdded_Parallel("1.0") +) +def user_removed_and_added_in_ldap_groups_in_parallel(self, ldap_server, ldap_user, count=20, timeout=200): + """Check that user can be authenticated successfully when it is + removed and added from mapping LDAP groups in parallel. + """ + uid = getuid() + role_names = [f"role{i}_{uid}" for i in range(count)] + users = [{"cn": ldap_user["username"], "userpassword": ldap_user["password"]}] + groups = [{"cn": role_name} for role_name in role_names] + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + with Given("I add all LDAP groups"): + groups = add_ldap_groups(groups=groups) + + for group in groups: + with And(f"I add LDAP user to the group {group['dn']}"): + add_user_to_group_in_ldap(user=ldap_user, group=group) + + with And("I add RBAC roles"): + add_rbac_roles(roles=role_names) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + tasks = [] + try: + with When("user try to login while user is added and removed from LDAP groups in parallel"): + p = Pool(15) + for i in range(15): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(remove_user_from_ldap_groups_in_parallel, (ldap_user, groups, i, 1,))) + tasks.append(p.apply_async(add_user_to_ldap_groups_in_parallel, (ldap_user, groups, i, 1,))) + + finally: + with Finally("it should work", flags=TE): + join(tasks, timeout) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_RBAC_Role_RemovedAndAdded_Parallel("1.0") +) +def roles_removed_and_added_in_parallel(self, ldap_server, ldap_user, count=20, timeout=200): + """Check that user can be authenticated successfully when roles that are mapped + by the LDAP groups are removed and added in parallel. + """ + uid = getuid() + role_names = [f"role{i}_{uid}" for i in range(count)] + users = [{"cn": ldap_user["username"], "userpassword": ldap_user["password"]}] + groups = [{"cn": role_name} for role_name in role_names] + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "" + } + ] + + fail("known bug that needs to be investigated") + + with Given("I add all LDAP groups"): + groups = add_ldap_groups(groups=groups) + for group in groups: + with And(f"I add LDAP user to the group {group['dn']}"): + add_user_to_group_in_ldap(user=ldap_user, group=group) + + with And("I add RBAC roles"): + add_rbac_roles(roles=role_names) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + tasks = [] + try: + with When("user try to login while mapped roles are added and removed in parallel"): + p = Pool(15) + for i in range(15): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(remove_roles_in_parallel, (role_names, i, 10,))) + tasks.append(p.apply_async(add_roles_in_parallel, (role_names, i, 10,))) + + finally: + with Finally("it should work", flags=TE): + join(tasks, timeout) + + with And("I clean up all the roles"): + for role_name in role_names: + with By(f"dropping role {role_name}", flags=TE): + self.context.node.query(f"DROP ROLE IF EXISTS {role_name}") + +@TestOutline +def parallel_login(self, ldap_server, ldap_user, user_count=10, timeout=200, role_count=10): + """Check that login of valid and invalid LDAP authenticated users + with mapped roles works in parallel. + """ + uid = getuid() + + role_names = [f"role{i}_{uid}" for i in range(role_count)] + users = [{"cn": f"parallel_user{i}", "userpassword": randomword(20)} for i in range(user_count)] + groups = [{"cn": f"clickhouse_{role_name}"} for role_name in role_names] + + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "clickhouse_" + } + ] + + with Given("I add LDAP users"): + users = add_ldap_users(users=users) + + with And("I add all LDAP groups"): + groups = add_ldap_groups(groups=groups) + + for group in groups: + for user in users: + with And(f"I add LDAP user {user['dn']} to the group {group['dn']}"): + add_user_to_group_in_ldap(user=user, group=group) + + with And("I add RBAC roles"): + add_rbac_roles(roles=role_names) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=role_mappings, restart=True) + + tasks = [] + try: + with When("users try to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + + finally: + with Then("it should work"): + join(tasks, timeout) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid("1.0") +) +def parallel_login_of_multiple_users(self, ldap_server, ldap_user, timeout=200, role_count=10): + """Check that valid and invalid logins of multiple LDAP authenticated users + with mapped roles works in parallel. + """ + parallel_login(user_count=10, ldap_user=ldap_user,ldap_server=ldap_server, + timeout=timeout, role_count=role_count) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_SameUser("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid("1.0") +) +def parallel_login_of_the_same_user(self, ldap_server, ldap_user, timeout=200, role_count=10): + """Check that valid and invalid logins of the same LDAP authenticated user + with mapped roles works in parallel. + """ + parallel_login(user_count=10, ldap_user=ldap_user,ldap_server=ldap_server, + timeout=timeout, role_count=role_count) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_MultipleServers("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid("1.0") +) +def parallel_login_of_ldap_users_with_multiple_servers(self, ldap_server, ldap_user, timeout=200): + """Check that valid and invalid logins of multiple LDAP users that have mapped roles + works in parallel using multiple LDAP external user directories. + """ + parallel_login_with_multiple_servers(ldap_server=ldap_server, ldap_user=ldap_user, + user_count=10, role_count=10,timeout=timeout, with_ldap_users=True, with_local_users=False) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalAndMultipleLDAP("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_ValidAndInvalid("1.0") +) +def parallel_login_of_local_and_ldap_users_with_multiple_servers(self, ldap_server, ldap_user, timeout=200): + """Check that valid and invalid logins of local users and LDAP users that have mapped roles + works in parallel using multiple LDAP external user directories. + """ + parallel_login_with_multiple_servers(ldap_server=ldap_server, ldap_user=ldap_user, + user_count=10, role_count=10, timeout=timeout, with_local_users=True, with_ldap_users=True) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_LocalOnly("1.0") +) +def parallel_login_of_local_users(self, ldap_server, ldap_user, timeout=200): + """Check that valid and invalid logins of local users + works in parallel when multiple LDAP external user directories + with role mapping are configured. + """ + parallel_login_with_multiple_servers(ldap_server=ldap_server, ldap_user=ldap_user, + user_count=10, role_count=10, timeout=timeout, with_local_users=True, with_ldap_users=False) + +@TestOutline +def parallel_login_with_multiple_servers(self, ldap_server, ldap_user, user_count=10, + role_count=10, timeout=200, with_ldap_users=True, with_local_users=False): + """Check that login of valid and invalid local users or LDAP users that have mapped roles + works in parallel using multiple LDAP external user directories. + """ + uid = getuid() + + cluster = self.context.cluster + user_groups = {} + + with Given("I define role names"): + role_names = [f"role{i}_{uid}" for i in range(role_count)] + + with And("I define corresponding group names"): + groups = [{"cn": f"clickhouse_{role_name}"} for role_name in role_names] + + if with_ldap_users: + with And("I define a group of users to be created on each LDAP server"): + user_groups["openldap1_users"] = [ + {"cn": f"openldap1_parallel_user{i}_{uid}", "userpassword": randomword(20)} for i in range(user_count) + ] + user_groups["openldap2_users"] = [ + {"cn": f"openldap2_parallel_user{i}_{uid}", "userpassword": randomword(20)} for i in range(user_count) + ] + + if with_local_users: + with And("I define a group of local users to be created"): + user_groups["local_users"] = [ + {"cn": f"local_parallel_user{i}_{uid}", "userpassword": randomword(20)} for i in range(user_count) + ] + + with And("I have a list of checks that I want to run for each user group"): + checks = [ + login_with_valid_username_and_password, + login_with_valid_username_and_invalid_password, + login_with_invalid_username_and_valid_password + ] + + with And("I create config file to define LDAP external user directory for each LDAP server"): + entries = { + "user_directories": [ + {"ldap": [ + {"server": "openldap1"}, + {"role_mappings" : [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "clickhouse_" + } + ]} + ]}, + {"ldap": [ + {"server": "openldap2"}, + {"role_mappings": [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": "(&(objectClass=groupOfUniqueNames)(uniquemember={bind_dn}))", + "prefix": "clickhouse_" + } + ]} + ]} + ] + } + config = create_entries_ldap_external_user_directory_config_content(entries) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=None, restart=True, config=config) + + if with_ldap_users: + with And("I add LDAP users to each LDAP server"): + openldap1_users = add_ldap_users(users=user_groups["openldap1_users"], node=cluster.node("openldap1")) + openldap2_users = add_ldap_users(users=user_groups["openldap2_users"], node=cluster.node("openldap2")) + + with And("I add all LDAP groups to each LDAP server"): + openldap1_groups = add_ldap_groups(groups=groups, node=cluster.node("openldap1")) + openldap2_groups = add_ldap_groups(groups=groups, node=cluster.node("openldap2")) + + with And("I add all users to LDAP groups on the first LDAP server"): + for group in openldap1_groups: + for user in openldap1_users: + with By(f"adding LDAP user {user['dn']} to the group {group['dn']}"): + add_user_to_group_in_ldap(user=user, group=group, node=cluster.node("openldap1")) + + with And("I add all users to LDAP groups on the second LDAP server"): + for group in openldap2_groups: + for user in openldap2_users: + with By(f"adding LDAP user {user['dn']} to the group {group['dn']}"): + add_user_to_group_in_ldap(user=user, group=group, node=cluster.node("openldap2")) + + with And("I add RBAC roles"): + add_rbac_roles(roles=role_names) + + if with_local_users: + with And("I add local users"): + add_rbac_users(users=user_groups["local_users"]) + + with And("I grant the same RBAC roles to local users"): + for user in user_groups["local_users"]: + for role_name in role_names: + self.context.node.query(f"GRANT {role_name} TO {user['cn']}") + + tasks = [] + + try: + with When("users in each group try to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + for users in user_groups.values(): + for check in checks: + tasks.append(p.apply_async(check, (users, i, 50,))) + + finally: + with Then("it should work"): + join(tasks, timeout) + +@TestFeature +@Name("mapping") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Search("1.0") +) +def feature(self): + """Check role LDAP role mapping. + """ + self.context.node = self.context.cluster.node("clickhouse1") + self.context.ldap_node = self.context.cluster.node("openldap1") + + servers = { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com" + }, + "openldap2": { + "host": "openldap2", + "port": "636", + "enable_tls": "yes", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "tls_require_cert": "never", + } + } + + users = [ + {"server": "openldap1", "username": "user1", "password": "user1", "login": True, + "dn": "cn=user1,ou=users,dc=company,dc=com"}, + ] + + with Given("I fix LDAP access permissions"): + fix_ldap_permissions() + + with And("I add LDAP servers configuration", description=f"{servers}"): + add_ldap_servers_configuration(servers=servers) + + for scenario in loads(current_module(), Scenario): + scenario(ldap_server="openldap1", ldap_user=users[0]) diff --git a/tests/testflows/ldap/role_mapping/tests/server_config.py b/tests/testflows/ldap/role_mapping/tests/server_config.py new file mode 100644 index 00000000000..85fe33f4388 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/tests/server_config.py @@ -0,0 +1,78 @@ +from testflows.core import * +from testflows.asserts import error + +from ldap.role_mapping.requirements import * + +from ldap.authentication.tests.common import invalid_server_config +from ldap.external_user_directory.tests.common import login + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN("1.0") +) +def valid_bind_dn(self): + """Check that LDAP users can login when `bind_dn` is valid. + """ + servers = { + "openldap1": { + "host": "openldap1", "port": "389", "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com" + } + } + + user = { + "server": "openldap1", "username": "user1", "password": "user1", "login": True, + } + + login(servers, "openldap1", user) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN("1.0") +) +def invalid_bind_dn(self): + """Check that LDAP users can't login when `bind_dn` is invalid. + """ + servers = { + "openldap1": { + "host": "openldap1", "port": "389", "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company2,dc=com" + }} + + user = { + "server": "openldap1", "username": "user1", "password": "user1", "login": True, + "exitcode": 4, + "message": "DB::Exception: user1: Authentication failed: password is incorrect or there is no user with such name." + } + + login(servers, "openldap1", user) + +@TestScenario +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN_ConflictWith_AuthDN("1.0") +) +def bind_dn_conflict_with_auth_dn(self, timeout=60): + """Check that an error is returned with both `bind_dn` and + `auth_dn_prefix` and `auth_dn_suffix` are specified at the same time. + """ + message = "DB::Exception: Deprecated 'auth_dn_prefix' and 'auth_dn_suffix' entries cannot be used with 'bind_dn' entry" + servers = { + "openldap1": { + "host": "openldap1", "port": "389", "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "auth_dn_prefix": "cn=", + "auth_dn_suffix": ",ou=users,dc=company,dc=com" + } + } + + invalid_server_config(servers, message=message, tail=18, timeout=timeout) + + +@TestFeature +@Name("server config") +def feature(self, node="clickhouse1"): + """Check LDAP server configuration. + """ + self.context.node = self.context.cluster.node(node) + for scenario in loads(current_module(), Scenario): + scenario() \ No newline at end of file diff --git a/tests/testflows/rbac/docker-compose/clickhouse-service.yml b/tests/testflows/rbac/docker-compose/clickhouse-service.yml index 2d79443dcbb..d5f981ca8b7 100755 --- a/tests/testflows/rbac/docker-compose/clickhouse-service.yml +++ b/tests/testflows/rbac/docker-compose/clickhouse-service.yml @@ -20,7 +20,7 @@ services: test: clickhouse client --query='select 1' interval: 10s timeout: 10s - retries: 3 + retries: 10 start_period: 300s cap_add: - SYS_PTRACE diff --git a/tests/testflows/regression.py b/tests/testflows/regression.py index 6e19e4e49e1..0e9a821cae0 100755 --- a/tests/testflows/regression.py +++ b/tests/testflows/regression.py @@ -14,11 +14,10 @@ def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): """ args = {"local": local, "clickhouse_binary_path": clickhouse_binary_path, "stress": stress, "parallel": parallel} -# Feature(test=load("example.regression", "regression"))(**args) -# Feature(test=load("ldap.regression", "regression"))(**args) -# for i in range(10): -# Feature(test=load("rbac.regression", "regression"))(**args) -# Feature(test=load("aes_encryption.regression", "regression"))(**args) + Feature(test=load("example.regression", "regression"))(**args) + Feature(test=load("ldap.regression", "regression"))(**args) + Feature(test=load("rbac.regression", "regression"))(**args) + Feature(test=load("aes_encryption.regression", "regression"))(**args) if main(): regression() From 6e1a1186424b32363b6b4a452198ca96090f3c49 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 11:14:37 +0300 Subject: [PATCH 088/697] Merge Filter and Expression steps. --- src/Processors/QueryPlan/QueryPlan.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 6b5f5bc30b6..9be9f6d0c0b 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -504,6 +504,7 @@ static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * /// TODO: FilterStep auto * parent_expr = typeid_cast(parent.get()); + auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); if (parent_expr && child_expr) @@ -526,6 +527,24 @@ static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * parent_node->children.swap(child_node->children); return true; } + else if (parent_filter && child_expr) + { + const auto & child_actions = child_expr->getExpression(); + const auto & parent_actions = parent_filter->getExpression(); + + if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + return false; + + auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + + auto filter = std::make_unique(child_expr->getInputStreams().front(), merged, + parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); + filter->setStepDescription(parent_filter->getStepDescription() + " + " + child_expr->getStepDescription()); + + parent_node->step = std::move(filter); + parent_node->children.swap(child_node->children); + return true; + } return false; } From 6db51965cd8826bfb6cf4771af727c0bedc1a57f Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Tue, 19 Jan 2021 12:03:48 +0300 Subject: [PATCH 089/697] Add S3 disk documentation [EN] --- .../mergetree-family/mergetree.md | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 80769fe9954..084d05ec0a0 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -657,6 +657,96 @@ The `default` storage policy implies using only one volume, which consists of on The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting. +## Using S3 for Data Storage {#table_engine-mergetree-s3} + +`MergeTree` family table engines is able to store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. + +Configuration markup: +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + http://proxy1 + http://proxy2 + + 10000 + 5000 + 100 + 10 + 1000 + /var/lib/clickhouse/disks/s3/ + true + /var/lib/clickhouse/disks/s3/cache/ + false + + + ... + +``` + +Required parameters: +- `endpoint` — S3 endpoint url in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint url should contain bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. + +Optional parameters: +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `max_connections` — S3 connections pool size. Default value is `100`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. +- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. +- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`. + + +S3 disk can be configured as `main` or `cold` storage: +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + + + + +
+ s3 +
+
+
+ + +
+ default +
+ + s3 + +
+ 0.2 +
+
+ ... +
+``` + +In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. + ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: From b00f01d6b1812af5c21b205f6a02b27e25defd37 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 13:03:25 +0300 Subject: [PATCH 090/697] Split filter optimization. --- src/Interpreters/ActionsDAG.cpp | 42 +++++++++++++++++++++++++ src/Interpreters/ActionsDAG.h | 19 +++++++++--- src/Processors/QueryPlan/QueryPlan.cpp | 43 ++++++++++++++++++++++---- 3 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 993986309ea..bdc233912ba 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -454,6 +454,36 @@ bool ActionsDAG::tryRestoreColumn(const std::string & column_name) return false; } +void ActionsDAG::removeUnusedInput(const std::string & column_name) +{ + auto it = inputs.begin(); + for (; it != inputs.end(); ++it) + if ((*it)->result_name == column_name) + break; + + if (it == inputs.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found input {} in ActionsDAG\n{}", column_name, dumpDAG()); + + auto * input = *it; + for (const auto & node : nodes) + for (const auto * child : node.children) + if (input == child) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot remove input {} because it has dependent nodes in ActionsDAG\n{}", + column_name, dumpDAG()); + + for (auto jt = index.begin(); jt != index.end(); ++jt) + { + if (*jt == input) + { + index.remove(jt); + break; + } + } + + inputs.erase(it); +} + ActionsDAGPtr ActionsDAG::clone() const { auto actions = cloneEmpty(); @@ -1067,4 +1097,16 @@ std::pair ActionsDAG::splitActionsBeforeArrayJoin return res; } +std::pair ActionsDAG::splitActionsForFilter(const std::string & column_name) const +{ + auto it = index.find(column_name); + if (it == index.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", + column_name, dumpDAG()); + + std::unordered_set split_nodes = {*it}; + return split(split_nodes); +} + } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 6b873eaaa26..c82496b2a8a 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -214,13 +214,13 @@ public: /// If column is not in index, try to find it in nodes and insert back into index. bool tryRestoreColumn(const std::string & column_name); + /// Find column in input. Remove it from input and index. + /// Checks that column in inputs and has not dependent nodes. + void removeUnusedInput(const std::string & column_name); void projectInput() { settings.project_input = true; } void removeUnusedActions(const Names & required_names); - /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN. - std::pair splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const; - bool hasArrayJoin() const; bool hasStatefulFunctions() const; bool empty() const; /// If actions only contain inputs. @@ -249,14 +249,25 @@ public: MatchColumnsMode mode, bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header. - /// Create ActionsDAG which represents expression equivalent to applying lhs and rhs actions consequently. + /// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently. /// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression. /// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`. /// Otherwise, any two actions may be combined. static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second); + /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children. + /// Execution of first then second parts on block is equivalent to execution of initial DAG. + /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal index (outputs). + /// Second DAG inputs may contain less inputs then first DAG (but also include other columns). std::pair split(std::unordered_set split_nodes) const; + /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN. + std::pair splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const; + + /// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name. + /// Index of initial actions must contain column_name. + std::pair splitActionsForFilter(const std::string & column_name) const; + private: Node & addNode(Node node, bool can_replace = false); Node & getNode(const std::string & name); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 9be9f6d0c0b..db38c3916fc 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -497,12 +497,13 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); } +/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep +/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node) { auto & parent = parent_node->step; auto & child = child_node->step; - /// TODO: FilterStep auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); @@ -549,6 +550,36 @@ static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * return false; } +/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes. +static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) +{ + auto * filter_step = typeid_cast(node->step.get()); + if (!filter_step) + return false; + + const auto & expr = filter_step->getExpression(); + auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName()); + + if (split.second->empty()) + return false; + + if (filter_step->removesFilterColumn()) + split.second->removeUnusedInput(filter_step->getFilterColumnName()); + + auto & filter_node = nodes.emplace_back(); + node->children.swap(filter_node.children); + node->children.push_back(&filter_node); + + filter_node.step = std::make_unique( + filter_node.children.at(0)->step->getOutputStream(), + std::move(split.first), + filter_step->getFilterColumnName(), + filter_step->removesFilterColumn()); + + node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); + return true; +} + void QueryPlan::optimize() { struct Frame @@ -566,12 +597,16 @@ void QueryPlan::optimize() if (frame.next_child == 0) { - /// First entrance, try push down. if (frame.node->children.size() == 1) { tryPushDownLimit(frame.node->step, frame.node->children.front()); while (tryMergeExpressions(frame.node, frame.node->children.front())); + + if (frame.node->children.size() == 1) + tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes); + + trySplitFilter(frame.node, nodes); } } @@ -582,10 +617,6 @@ void QueryPlan::optimize() } else { - /// Last entrance, try lift up. - if (frame.node->children.size() == 1) - tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes); - stack.pop(); } } From e313209a103f47f362b41653b252337e5c361774 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 19 Jan 2021 13:26:56 +0300 Subject: [PATCH 091/697] Support CHARACTER data type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Добавил поддержку типа данных CHARACTER. --- docs/en/sql-reference/ansi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md index 5ca216d11fa..84e47902f3b 100644 --- a/docs/en/sql-reference/ansi.md +++ b/docs/en/sql-reference/ansi.md @@ -31,7 +31,7 @@ The following table lists cases when query feature works in ClickHouse, but beha | E011-05 | Numeric comparison | Yes {.text-success} | | | E011-06 | Implicit casting among the numeric data types | No {.text-danger} | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast | | **E021** | **Character string types** | **Partial**{.text-warning} | | -| E021-01 | CHARACTER data type | No {.text-danger} | | +| E021-01 | CHARACTER data type | Yes {.text-danger} | | | E021-02 | CHARACTER VARYING data type | Yes {.text-danger} | | | E021-03 | Character literals | Partial {.text-warning} | No automatic concatenation of consecutive literals and character set support | | E021-04 | CHARACTER_LENGTH function | Partial {.text-warning} | No `USING` clause | From b0b3cfbd028b9beb3e6e2c3855702c4dba7b2e93 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 14:48:09 +0300 Subject: [PATCH 092/697] Split filter optimization. --- src/Interpreters/ActionsDAG.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index bdc233912ba..60443e7656b 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -481,6 +481,15 @@ void ActionsDAG::removeUnusedInput(const std::string & column_name) } } + for (auto jt = nodes.begin(); jt != nodes.end(); ++jt) + { + if (&(*jt) == input) + { + nodes.erase(jt); + break; + } + } + inputs.erase(it); } From 91403b2f75fa0b589d82c4ad29523f37efa4025c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 15:04:45 +0300 Subject: [PATCH 093/697] Split filter optimization. --- src/Processors/QueryPlan/QueryPlan.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index db38c3916fc..dc33c96700e 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -617,6 +617,10 @@ void QueryPlan::optimize() } else { + trySplitFilter(frame.node, nodes); + + while (tryMergeExpressions(frame.node, frame.node->children.front())); + stack.pop(); } } From 0711957fbb96dedf1b8da968cad0903dff7215a4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 15:08:21 +0300 Subject: [PATCH 094/697] Split filter optimization. --- src/Processors/QueryPlan/QueryPlan.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index dc33c96700e..97934c8f500 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -617,9 +617,12 @@ void QueryPlan::optimize() } else { - trySplitFilter(frame.node, nodes); + if (frame.node->children.size() == 1) + { + while (tryMergeExpressions(frame.node, frame.node->children.front())); - while (tryMergeExpressions(frame.node, frame.node->children.front())); + trySplitFilter(frame.node, nodes); + } stack.pop(); } From 0246e3eacef7bfb9a6b25adb774a4f2d7dd22b99 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 15:51:53 +0300 Subject: [PATCH 095/697] Added perftest. --- tests/performance/split_filter.xml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/performance/split_filter.xml diff --git a/tests/performance/split_filter.xml b/tests/performance/split_filter.xml new file mode 100644 index 00000000000..7bd4af51abd --- /dev/null +++ b/tests/performance/split_filter.xml @@ -0,0 +1,4 @@ + + select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000)) where y = 0 settings enable_optimize_predicate_expression=0 + select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0 + From 9db2974aaae5b3cca4129f6ed4b8cfeacf846c34 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 15:54:55 +0300 Subject: [PATCH 096/697] Update explain for filter --- src/Processors/QueryPlan/FilterStep.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 8fb405e685b..921c1351511 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -83,7 +83,11 @@ void FilterStep::transformPipeline(QueryPipeline & pipeline) void FilterStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, ' '); - settings.out << prefix << "Filter column: " << filter_column_name << '\n'; + settings.out << prefix << "Filter column: " << filter_column_name; + + if (remove_filter_column) + settings.out << " (removed)"; + settings.out << '\n'; bool first = true; auto expression = std::make_shared(actions_dag); @@ -94,6 +98,11 @@ void FilterStep::describeActions(FormatSettings & settings) const first = false; settings.out << action.toString() << '\n'; } + + settings.out << prefix << "Positions:"; + for (const auto & pos : expression->getResultPositions()) + settings.out << ' ' << pos; + settings.out << '\n'; } } From b1c7944f843626543a37d39ad4772a31c4020959 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 16:08:14 +0300 Subject: [PATCH 097/697] Fix description after step optimizations --- src/Processors/QueryPlan/QueryPlan.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 97934c8f500..da659b78ce1 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -457,6 +457,8 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * if (split_actions.first->empty()) return; + auto description = parent->getStepDescription(); + /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. if (split_actions.second->empty()) { @@ -475,6 +477,8 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); + child->setStepDescription(std::move(description)); + array_join_step->updateInputStream(child->getOutputStream(), expected_header); return; } @@ -488,6 +492,7 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * node.step = std::make_unique(node.children.at(0)->step->getOutputStream(), std::move(split_actions.first)); + node.step->setStepDescription(description); array_join_step->updateInputStream(node.step->getOutputStream(), {}); if (expression_step) @@ -495,6 +500,8 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * else parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); + + parent->setStepDescription(description + " [split]"); } /// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep @@ -577,6 +584,10 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) filter_step->removesFilterColumn()); node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); + + filter_node.step->setStepDescription(filter_step->getStepDescription() + " [split]"); + node->step->setStepDescription(filter_step->getStepDescription() + " [split]"); + return true; } From 964af8e02c88f4adcffc224fe4fdca0d0f2ca1b3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 16:17:52 +0300 Subject: [PATCH 098/697] Fix description after step optimizations --- src/Processors/QueryPlan/QueryPlan.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index da659b78ce1..ba64e644c5f 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -529,7 +529,7 @@ static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); auto expr = std::make_unique(child_expr->getInputStreams().front(), merged); - expr->setStepDescription(parent_expr->getStepDescription() + " + " + child_expr->getStepDescription()); + expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); parent_node->step = std::move(expr); parent_node->children.swap(child_node->children); @@ -547,7 +547,7 @@ static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * auto filter = std::make_unique(child_expr->getInputStreams().front(), merged, parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); - filter->setStepDescription(parent_filter->getStepDescription() + " + " + child_expr->getStepDescription()); + filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); parent_node->step = std::move(filter); parent_node->children.swap(child_node->children); @@ -585,8 +585,8 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); - filter_node.step->setStepDescription(filter_step->getStepDescription() + " [split]"); - node->step->setStepDescription(filter_step->getStepDescription() + " [split]"); + filter_node.step->setStepDescription("(" + filter_step->getStepDescription() + ")[split]"); + node->step->setStepDescription(filter_step->getStepDescription()); return true; } From 35f48e60ad2f79045586b3caa70d36ab07bf49de Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 16:21:28 +0300 Subject: [PATCH 099/697] Fix description after step optimizations --- src/Processors/QueryPlan/QueryPlan.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index ba64e644c5f..d393dbb604f 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -573,6 +573,8 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) if (filter_step->removesFilterColumn()) split.second->removeUnusedInput(filter_step->getFilterColumnName()); + auto description = filter_step->getStepDescription(); + auto & filter_node = nodes.emplace_back(); node->children.swap(filter_node.children); node->children.push_back(&filter_node); @@ -585,7 +587,7 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); - filter_node.step->setStepDescription("(" + filter_step->getStepDescription() + ")[split]"); + filter_node.step->setStepDescription("(" + description + ")[split]"); node->step->setStepDescription(filter_step->getStepDescription()); return true; From 17edf238e3ff3eea2eb03f5b4016c287a5ec093a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 16:35:26 +0300 Subject: [PATCH 100/697] Added test. --- .../0_stateless/01655_plan_optimizations.reference | 11 +++++++++++ tests/queries/0_stateless/01655_plan_optimizations.sh | 10 ++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/01655_plan_optimizations.reference create mode 100755 tests/queries/0_stateless/01655_plan_optimizations.sh diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference new file mode 100644 index 00000000000..fda40305f9d --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -0,0 +1,11 @@ +sipHash should be calculated after filtration +FUNCTION sipHash64 +Filter column: equals +sorting steps should know about limit +Limit 10 +MergingSorted +Limit 10 +MergeSorting +Limit 10 +PartialSorting +Limit 10 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh new file mode 100755 index 00000000000..4f3541f9dde --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "sipHash should be calculated after filtration" +$CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals" +echo "sorting steps should know about limit" +$CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Limit 10" From ac64a1339290be826e10bfa454897f825c87457c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 17:22:28 +0300 Subject: [PATCH 101/697] Split storage and requests processing --- src/Common/ZooKeeper/TestKeeperStorage.cpp | 325 +++++++----------- src/Common/ZooKeeper/TestKeeperStorage.h | 70 ++-- .../ZooKeeper/TestKeeperStorageDispatcher.cpp | 131 +++++++ .../ZooKeeper/TestKeeperStorageDispatcher.h | 58 ++++ src/Interpreters/Context.cpp | 8 +- src/Interpreters/Context.h | 4 +- src/Server/TestKeeperTCPHandler.cpp | 166 ++++----- src/Server/TestKeeperTCPHandler.h | 14 +- 8 files changed, 410 insertions(+), 366 deletions(-) create mode 100644 src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp create mode 100644 src/Common/ZooKeeper/TestKeeperStorageDispatcher.h diff --git a/src/Common/ZooKeeper/TestKeeperStorage.cpp b/src/Common/ZooKeeper/TestKeeperStorage.cpp index daadba6519e..4f1300cde8c 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.cpp +++ b/src/Common/ZooKeeper/TestKeeperStorage.cpp @@ -39,8 +39,9 @@ static String baseName(const String & path) return path.substr(rslash_pos + 1); } -static void processWatchesImpl(const String & path, TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches, Coordination::Event event_type) +static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches, Coordination::Event event_type) { + TestKeeperStorage::ResponsesForSessions result; auto it = watches.find(path); if (it != watches.end()) { @@ -50,9 +51,8 @@ static void processWatchesImpl(const String & path, TestKeeperStorage::Watches & watch_response->zxid = -1; watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; - for (auto & watcher : it->second) - if (watcher.watch_callback) - watcher.watch_callback(watch_response); + for (auto watcher_session : it->second) + result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_response}); watches.erase(it); } @@ -67,19 +67,17 @@ static void processWatchesImpl(const String & path, TestKeeperStorage::Watches & watch_list_response->zxid = -1; watch_list_response->type = Coordination::Event::CHILD; watch_list_response->state = Coordination::State::CONNECTED; - for (auto & watcher : it->second) - if (watcher.watch_callback) - watcher.watch_callback(watch_list_response); + for (auto watcher_session : it->second) + result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_list_response}); list_watches.erase(it); } + return result; } TestKeeperStorage::TestKeeperStorage() { container.emplace("/", Node()); - - processing_thread = ThreadFromGlobalPool([this] { processingThread(); }); } using Undo = std::function; @@ -92,7 +90,7 @@ struct TestKeeperStorageRequest : zk_request(zk_request_) {} virtual std::pair process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0; - virtual void processWatches(TestKeeperStorage::Watches & /*watches*/, TestKeeperStorage::Watches & /*list_watches*/) const {} + virtual TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & /*watches*/, TestKeeperStorage::Watches & /*list_watches*/) const { return {}; } virtual ~TestKeeperStorageRequest() = default; }; @@ -111,9 +109,9 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest { using TestKeeperStorageRequest::TestKeeperStorageRequest; - void processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override + TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override { - processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); + return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } std::pair process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override @@ -271,9 +269,9 @@ struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest return { response_ptr, undo }; } - void processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override + TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override { - processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); + return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } }; @@ -344,9 +342,9 @@ struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest return { response_ptr, undo }; } - void processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override + TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override { - processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); + return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } }; @@ -502,10 +500,15 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest } } - void processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override + TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override { + TestKeeperStorage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) - generic_request->processWatches(watches, list_watches); + { + auto responses = generic_request->processWatches(watches, list_watches); + result.insert(result.end(), responses.begin(), responses.end()); + } + return result; } }; @@ -518,160 +521,49 @@ struct TestKeeperStorageCloseRequest final : public TestKeeperStorageRequest } }; -void TestKeeperStorage::processingThread() +TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const RequestsForSessions & expired_requests) { - setThreadName("TestKeeperSProc"); + if (finalized) + throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR); - try + finalized = true; + + ResponsesForSessions finalize_results; + auto finish_watch = [] (const auto & watch_pair) -> ResponsesForSessions { - while (!shutdown) - { - RequestInfo info; + ResponsesForSessions results; + std::shared_ptr response = std::make_shared(); + response->type = Coordination::SESSION; + response->state = Coordination::EXPIRED_SESSION; + response->error = Coordination::Error::ZSESSIONEXPIRED; - UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds()); + for (auto & watcher_session : watch_pair.second) + results.push_back(ResponseForSession{watcher_session, response}); + return results; + }; - if (requests_queue.tryPop(info, max_wait)) - { - if (shutdown) - break; - - auto zk_request = info.request->zk_request; - if (zk_request->getOpNum() == Coordination::OpNum::Close) - { - auto it = ephemerals.find(info.session_id); - if (it != ephemerals.end()) - { - for (const auto & ephemeral_path : it->second) - { - container.erase(ephemeral_path); - processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); - } - ephemerals.erase(it); - } - clearDeadWatches(info.session_id); - - /// Finish connection - auto response = std::make_shared(); - response->xid = zk_request->xid; - response->zxid = getZXID(); - info.response_callback(response); - } - else - { - auto [response, _] = info.request->process(container, ephemerals, zxid, info.session_id); - - if (info.watch_callback) - { - if (response->error == Coordination::Error::ZOK) - { - auto & watches_type = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList - ? list_watches - : watches; - - watches_type[zk_request->getPath()].emplace_back(Watcher{info.session_id, info.watch_callback}); - sessions_and_watchers[info.session_id].emplace(zk_request->getPath()); - } - else if (response->error == Coordination::Error::ZNONODE && zk_request->getOpNum() == Coordination::OpNum::Exists) - { - watches[zk_request->getPath()].emplace_back(Watcher{info.session_id, info.watch_callback}); - sessions_and_watchers[info.session_id].emplace(zk_request->getPath()); - } - else - { - std::shared_ptr watch_response = std::make_shared(); - watch_response->path = zk_request->getPath(); - watch_response->xid = -1; - watch_response->error = response->error; - watch_response->type = Coordination::Event::NOTWATCHING; - info.watch_callback(watch_response); - } - } - - if (response->error == Coordination::Error::ZOK) - info.request->processWatches(watches, list_watches); - - response->xid = zk_request->xid; - response->zxid = getZXID(); - - info.response_callback(response); - } - } - } - } - catch (...) + for (auto & path_watch : watches) { - tryLogCurrentException(__PRETTY_FUNCTION__); - finalize(); - } -} - - -void TestKeeperStorage::finalize() -{ - { - std::lock_guard lock(push_request_mutex); - - if (shutdown) - return; - - shutdown = true; - - if (processing_thread.joinable()) - processing_thread.join(); + auto watch_responses = finish_watch(path_watch); + finalize_results.insert(finalize_results.end(), watch_responses.begin(), watch_responses.end()); } - try + watches.clear(); + for (auto & path_watch : list_watches) { - { - auto finish_watch = [] (const auto & watch_pair) - { - Coordination::ZooKeeperWatchResponse response; - response.type = Coordination::SESSION; - response.state = Coordination::EXPIRED_SESSION; - response.error = Coordination::Error::ZSESSIONEXPIRED; + auto list_watch_responses = finish_watch(path_watch); + finalize_results.insert(finalize_results.end(), list_watch_responses.begin(), list_watch_responses.end()); + } + list_watches.clear(); + sessions_and_watchers.clear(); - for (auto & watcher : watch_pair.second) - { - if (watcher.watch_callback) - { - try - { - watcher.watch_callback(std::make_shared(response)); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - }; - for (auto & path_watch : watches) - finish_watch(path_watch); - watches.clear(); - for (auto & path_watch : list_watches) - finish_watch(path_watch); - list_watches.clear(); - sessions_and_watchers.clear(); - } - RequestInfo info; - while (requests_queue.tryPop(info)) - { - auto response = info.request->zk_request->makeResponse(); - response->error = Coordination::Error::ZSESSIONEXPIRED; - try - { - info.response_callback(response); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - catch (...) + for (const auto & [session_id, zk_request] : expired_requests) { - tryLogCurrentException(__PRETTY_FUNCTION__); + auto response = zk_request->makeResponse(); + response->error = Coordination::Error::ZSESSIONEXPIRED; + finalize_results.push_back(ResponseForSession{session_id, response}); } + return finalize_results; } @@ -731,55 +623,80 @@ TestKeeperWrapperFactory::TestKeeperWrapperFactory() registerTestKeeperRequestWrapper(*this); } -void TestKeeperStorage::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, ResponseCallback callback) + +TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id) { - TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(request); - RequestInfo request_info; - request_info.time = clock::now(); - request_info.request = storage_request; - request_info.session_id = session_id; - request_info.response_callback = callback; - - std::lock_guard lock(push_request_mutex); - /// Put close requests without timeouts - if (request->getOpNum() == Coordination::OpNum::Close) - requests_queue.push(std::move(request_info)); - else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED); - -} - -void TestKeeperStorage::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, ResponseCallback callback, ResponseCallback watch_callback) -{ - TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(request); - RequestInfo request_info; - request_info.time = clock::now(); - request_info.request = storage_request; - request_info.session_id = session_id; - request_info.response_callback = callback; - if (request->has_watch) - request_info.watch_callback = watch_callback; - - std::lock_guard lock(push_request_mutex); - /// Put close requests without timeouts - if (request->getOpNum() == Coordination::OpNum::Close) - requests_queue.push(std::move(request_info)); - else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED); -} - -TestKeeperStorage::~TestKeeperStorage() -{ - try + TestKeeperStorage::ResponsesForSessions results; + if (zk_request->getOpNum() == Coordination::OpNum::Close) { - finalize(); + auto it = ephemerals.find(session_id); + if (it != ephemerals.end()) + { + for (const auto & ephemeral_path : it->second) + { + container.erase(ephemeral_path); + auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); + results.insert(results.end(), responses.begin(), responses.end()); + } + ephemerals.erase(it); + } + clearDeadWatches(session_id); + + /// Finish connection + auto response = std::make_shared(); + response->xid = zk_request->xid; + response->zxid = getZXID(); + results.push_front(ResponseForSession{session_id, response}); } - catch (...) + else { - tryLogCurrentException(__PRETTY_FUNCTION__); + + TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(zk_request); + auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id); + + if (zk_request->has_watch) + { + if (response->error == Coordination::Error::ZOK) + { + auto & watches_type = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList + ? list_watches + : watches; + + watches_type[zk_request->getPath()].emplace_back(session_id); + sessions_and_watchers[session_id].emplace(zk_request->getPath()); + } + else if (response->error == Coordination::Error::ZNONODE && zk_request->getOpNum() == Coordination::OpNum::Exists) + { + watches[zk_request->getPath()].emplace_back(session_id); + sessions_and_watchers[session_id].emplace(zk_request->getPath()); + } + else + { + std::shared_ptr watch_response = std::make_shared(); + watch_response->path = zk_request->getPath(); + watch_response->xid = -1; + watch_response->error = response->error; + watch_response->type = Coordination::Event::NOTWATCHING; + results.push_back(ResponseForSession{session_id, watch_response}); + } + } + + if (response->error == Coordination::Error::ZOK) + { + auto watch_responses = storage_request->processWatches(watches, list_watches); + results.insert(results.end(), watch_responses.begin(), watch_responses.end()); + } + + response->xid = zk_request->xid; + response->zxid = getZXID(); + + results.push_front(ResponseForSession{session_id, response}); } + + return results; } + void TestKeeperStorage::clearDeadWatches(int64_t session_id) { auto watches_it = sessions_and_watchers.find(session_id); @@ -793,7 +710,7 @@ void TestKeeperStorage::clearDeadWatches(int64_t session_id) auto & watches_for_path = watch->second; for (auto w_it = watches_for_path.begin(); w_it != watches_for_path.end();) { - if (w_it->session_id == session_id) + if (*w_it == session_id) w_it = watches_for_path.erase(w_it); else ++w_it; @@ -808,7 +725,7 @@ void TestKeeperStorage::clearDeadWatches(int64_t session_id) auto & list_watches_for_path = list_watch->second; for (auto w_it = list_watches_for_path.begin(); w_it != list_watches_for_path.end();) { - if (w_it->session_id == session_id) + if (*w_it == session_id) w_it = list_watches_for_path.erase(w_it); else ++w_it; diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Common/ZooKeeper/TestKeeperStorage.h index afb0a7add82..5afa5032bcf 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.h +++ b/src/Common/ZooKeeper/TestKeeperStorage.h @@ -4,9 +4,9 @@ #include #include #include -#include #include #include +#include namespace zkutil { @@ -18,10 +18,7 @@ using ResponseCallback = std::function session_id_counter{0}; struct Node @@ -34,71 +31,58 @@ public: int32_t seq_num = 0; }; - struct Watcher + struct ResponseForSession { int64_t session_id; - ResponseCallback watch_callback; + Coordination::ZooKeeperResponsePtr response; }; + using ResponsesForSessions = std::deque; + + struct RequestForSession + { + int64_t session_id; + Coordination::ZooKeeperRequestPtr request; + }; + + using RequestsForSessions = std::deque; + using Container = std::map; using Ephemerals = std::unordered_map>; using SessionAndWatcher = std::unordered_map>; + using SessionIDs = std::vector; - using WatchCallbacks = std::vector; - using Watches = std::map; + using Watches = std::map; Container container; Ephemerals ephemerals; SessionAndWatcher sessions_and_watchers; std::atomic zxid{0}; - std::atomic shutdown{false}; + std::atomic finalized{false}; Watches watches; Watches list_watches; /// Watches for 'list' request (watches on children). - using clock = std::chrono::steady_clock; - - struct RequestInfo - { - TestKeeperStorageRequestPtr request; - ResponseCallback response_callback; - ResponseCallback watch_callback; - clock::time_point time; - int64_t session_id; - }; - - std::mutex push_request_mutex; - using RequestsQueue = ConcurrentBoundedQueue; - RequestsQueue requests_queue{1}; - - void finalize(); - - ThreadFromGlobalPool processing_thread; - - void processingThread(); void clearDeadWatches(int64_t session_id); -public: - using AsyncResponse = std::future; - TestKeeperStorage(); - ~TestKeeperStorage(); - struct ResponsePair + int64_t getZXID() { - AsyncResponse response; - std::optional watch_response; - }; - void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, ResponseCallback callback); - void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, ResponseCallback callback, ResponseCallback watch_callback); + return zxid.fetch_add(1); + } + +public: + TestKeeperStorage(); + + ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id); + ResponsesForSessions finalize(const RequestsForSessions & expired_requests); int64_t getSessionID() { return session_id_counter.fetch_add(1); } - int64_t getZXID() - { - return zxid.fetch_add(1); - } + + }; } diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp new file mode 100644 index 00000000000..b1233fc47e3 --- /dev/null +++ b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp @@ -0,0 +1,131 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + + extern const int LOGICAL_ERROR; + extern const int TIMEOUT_EXCEEDED; +} + +} +namespace zkutil +{ + +void TestKeeperStorageDispatcher::processingThread() +{ + setThreadName("TestKeeperSProc"); + try + { + while (!shutdown) + { + RequestInfo info; + + UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds()); + + if (requests_queue.tryPop(info, max_wait)) + { + if (shutdown) + break; + + auto responses = storage.processRequest(info.request, info.session_id); + for (const auto & response_for_session : responses) + setResponse(response_for_session.session_id, response_for_session.response); + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + finalize(); + } +} + +void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) +{ + std::lock_guard lock(session_to_response_callback_mutex); + auto session_writer = session_to_response_callback.find(session_id); + if (session_writer == session_to_response_callback.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id); + + session_writer->second(response); + /// Session closed, no more writes + if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close) + session_to_response_callback.erase(session_writer); +} + +void TestKeeperStorageDispatcher::finalize() +{ + { + std::lock_guard lock(push_request_mutex); + + if (shutdown) + return; + + shutdown = true; + + if (processing_thread.joinable()) + processing_thread.join(); + } + + RequestInfo info; + TestKeeperStorage::RequestsForSessions expired_requests; + while (requests_queue.tryPop(info)) + expired_requests.push_back(TestKeeperStorage::RequestForSession{info.session_id, info.request}); + + auto expired_responses = storage.finalize(expired_requests); + + for (const auto & response_for_session : expired_responses) + setResponse(response_for_session.session_id, response_for_session.response); +} + +void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id) +{ + + { + std::lock_guard lock(session_to_response_callback_mutex); + if (session_to_response_callback.count(session_id) == 0) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id); + } + + RequestInfo request_info; + request_info.time = clock::now(); + request_info.request = request; + request_info.session_id = session_id; + + std::lock_guard lock(push_request_mutex); + /// Put close requests without timeouts + if (request->getOpNum() == Coordination::OpNum::Close) + requests_queue.push(std::move(request_info)); + else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds())) + throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED); +} + +TestKeeperStorageDispatcher::TestKeeperStorageDispatcher() +{ + processing_thread = ThreadFromGlobalPool([this] { processingThread(); }); +} + +TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher() +{ + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback) +{ + std::lock_guard lock(session_to_response_callback_mutex); + if (!session_to_response_callback.try_emplace(session_id, callback).second) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id); +} + +} diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h new file mode 100644 index 00000000000..27abf17ac73 --- /dev/null +++ b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include + +namespace zkutil +{ + +using ZooKeeperResponseCallback = std::function; + +class TestKeeperStorageDispatcher +{ +private: + Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000}; + + using clock = std::chrono::steady_clock; + + struct RequestInfo + { + Coordination::ZooKeeperRequestPtr request; + clock::time_point time; + int64_t session_id; + }; + + std::mutex push_request_mutex; + + using RequestsQueue = ConcurrentBoundedQueue; + RequestsQueue requests_queue{1}; + std::atomic shutdown{false}; + using SessionToResponseCallback = std::unordered_map; + + std::mutex session_to_response_callback_mutex; + SessionToResponseCallback session_to_response_callback; + + ThreadFromGlobalPool processing_thread; + + TestKeeperStorage storage; + +private: + void processingThread(); + void finalize(); + void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); + +public: + TestKeeperStorageDispatcher(); + ~TestKeeperStorageDispatcher(); + + void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id); + int64_t getSessionID() + { + return storage.getSessionID(); + } + void registerSession(int64_t session_id, ZooKeeperResponseCallback callback); +}; + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2a8fdce869b..ea10024b3cb 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -306,7 +306,7 @@ struct ContextShared ConfigurationPtr zookeeper_config; /// Stores zookeeper configs mutable std::mutex test_keeper_storage_mutex; - mutable std::shared_ptr test_keeper_storage; + mutable std::shared_ptr test_keeper_storage; mutable std::mutex auxiliary_zookeepers_mutex; mutable std::map auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients. ConfigurationPtr auxiliary_zookeepers_config; /// Stores auxiliary zookeepers configs @@ -1531,11 +1531,11 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } -std::shared_ptr & Context::getTestKeeperStorage() const +std::shared_ptr & Context::getTestKeeperStorage() const { std::lock_guard lock(shared->test_keeper_storage_mutex); if (!shared->test_keeper_storage) - shared->test_keeper_storage = std::make_shared(); + shared->test_keeper_storage = std::make_shared(); return shared->test_keeper_storage; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 79140f0d209..dc8efb058e7 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -40,7 +40,7 @@ namespace Poco namespace zkutil { class ZooKeeper; - class TestKeeperStorage; + class TestKeeperStorageDispatcher; } @@ -513,7 +513,7 @@ public: std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; - std::shared_ptr & getTestKeeperStorage() const; + std::shared_ptr & getTestKeeperStorage() const; /// Set auxiliary zookeepers configuration at server starting or configuration reloading. void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config); diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index aeb7da038b7..e81a2e9ef99 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes struct PollResult { - std::vector ready_responses; + bool has_responses; bool has_requests; bool error; }; @@ -162,10 +162,10 @@ struct SocketInterruptablePollWrapper { do { - size_t response_position; - readIntBinary(response_position, response_in); - result.ready_responses.push_back(response_position); - } while (response_in.available()); + UInt8 response_byte; + readIntBinary(response_byte, response_in); + result.has_responses = true; + } while (response_in.available()); /// Just to drain all of them } } } @@ -186,11 +186,12 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S , server(server_) , log(&Poco::Logger::get("TestKeeperTCPHandler")) , global_context(server.context()) - , test_keeper_storage(global_context.getTestKeeperStorage()) + , test_keeper_storage_dispatcher(global_context.getTestKeeperStorage()) , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) - , session_id(test_keeper_storage->getSessionID()) + , session_id(test_keeper_storage_dispatcher->getSessionID()) , poll_wrapper(std::make_unique(socket_)) + , responses(1000) { } @@ -278,6 +279,16 @@ void TestKeeperTCPHandler::runImpl() } sendHandshake(); + + auto response_fd = poll_wrapper->getResponseFD(); + auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response) + { + responses.push(response); + UInt8 single_byte = 1; + [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte)); + }; + test_keeper_storage_dispatcher->registerSession(session_id, response_callback); + session_stopwatch.start(); bool close_received = false; try @@ -291,27 +302,12 @@ void TestKeeperTCPHandler::runImpl() { do { - Coordination::OpNum received_op = receiveRequest(); + auto [received_op, received_xid] = receiveRequest(); if (received_op == Coordination::OpNum::Close) { - auto last_response = responses.find(response_id_counter - 1); - if (last_response == responses.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Just inserted response #{} not found in responses", response_id_counter - 1); - LOG_DEBUG(log, "Received close request for session #{}", session_id); - if (last_response->second.wait_for(std::chrono::microseconds(operation_timeout.totalMicroseconds())) != std::future_status::ready) - { - LOG_DEBUG(log, "Cannot sent close for session #{}", session_id); - } - else - { - LOG_DEBUG(log, "Sent close for session #{}", session_id); - last_response->second.get()->write(*out); - } - - close_received = true; - - break; + LOG_DEBUG(log, "Received close event with xid {} for session id #{}", received_xid, session_id); + close_xid = received_xid; } else if (received_op == Coordination::OpNum::Heartbeat) { @@ -322,44 +318,36 @@ void TestKeeperTCPHandler::runImpl() while (in->available()); } + if (result.has_responses) + { + Coordination::ZooKeeperResponsePtr response; + while (responses.tryPop(response)) + { + if (response->xid == close_xid) + { + close_received = true; + break; + } + + if (response->error == Coordination::Error::ZOK) + response->write(*out); + else if (response->xid != Coordination::WATCH_XID) + response->write(*out); + /// skipping bad response for watch + } + } + if (close_received) break; - for (size_t response_id : result.ready_responses) - { - auto response_future = responses.find(response_id); - if (response_future == responses.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get unknown response #{}", response_id); - - if (response_future->second.wait_for(0s) != std::future_status::ready) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Response #{} was market as ready but corresponding future not ready yet", response_id); - - auto response = response_future->second.get(); - if (response->error == Coordination::Error::ZOK) - { - response->write(*out); - } - else - { - /// TODO Get rid of this - if (!dynamic_cast(response.get())) - response->write(*out); - } - responses.erase(response_future); - } - if (result.error) throw Exception("Exception happened while reading from socket", ErrorCodes::SYSTEM_ERROR); if (session_stopwatch.elapsedMicroseconds() > static_cast(session_timeout.totalMicroseconds())) { LOG_DEBUG(log, "Session #{} expired", session_id); - auto response = putCloseRequest(); - if (response.wait_for(std::chrono::microseconds(operation_timeout.totalMicroseconds())) != std::future_status::ready) + if (!finish()) LOG_DEBUG(log, "Cannot sent close for expired session #{}", session_id); - else - response.get()->write(*out); - break; } } @@ -367,29 +355,33 @@ void TestKeeperTCPHandler::runImpl() catch (const Exception & ex) { LOG_INFO(log, "Got exception processing session #{}: {}", session_id, getExceptionMessage(ex, true)); - auto response = putCloseRequest(); - if (response.wait_for(std::chrono::microseconds(operation_timeout.totalMicroseconds())) != std::future_status::ready) + if (!finish()) LOG_DEBUG(log, "Cannot sent close for session #{}", session_id); - else - response.get()->write(*out); } } -zkutil::TestKeeperStorage::AsyncResponse TestKeeperTCPHandler::putCloseRequest() +bool TestKeeperTCPHandler::finish() { Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); - request->xid = Coordination::CLOSE_XID; - auto promise = std::make_shared>(); - zkutil::ResponseCallback callback = [promise] (const Coordination::ZooKeeperResponsePtr & response) + request->xid = close_xid; + test_keeper_storage_dispatcher->putRequest(request, session_id); + + Coordination::ZooKeeperResponsePtr response; + bool finished = false; + while (responses.tryPop(response, operation_timeout.totalMilliseconds())) { - promise->set_value(response); - }; - test_keeper_storage->putRequest(request, session_id, callback); - return promise->get_future(); + if (response->xid == close_xid) + { + finished = true; + response->write(*out); + break; + } + } + return finished; } -Coordination::OpNum TestKeeperTCPHandler::receiveRequest() +std::pair TestKeeperTCPHandler::receiveRequest() { int32_t length; Coordination::read(length, *in); @@ -402,47 +394,9 @@ Coordination::OpNum TestKeeperTCPHandler::receiveRequest() Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(opnum); request->xid = xid; request->readImpl(*in); - auto promise = std::make_shared>(); - if (opnum != Coordination::OpNum::Close) - { - int response_fd = poll_wrapper->getResponseFD(); - size_t response_num = response_id_counter++; - zkutil::ResponseCallback callback = [response_fd, promise, response_num] (const Coordination::ZooKeeperResponsePtr & response) - { - promise->set_value(response); - [[maybe_unused]] int result = write(response_fd, &response_num, sizeof(response_num)); - }; - if (request->has_watch) - { - auto watch_promise = std::make_shared>(); - size_t watch_response_num = response_id_counter++; - zkutil::ResponseCallback watch_callback = [response_fd, watch_promise, watch_response_num] (const Coordination::ZooKeeperResponsePtr & response) - { - watch_promise->set_value(response); - [[maybe_unused]] int result = write(response_fd, &watch_response_num, sizeof(watch_response_num)); - }; - test_keeper_storage->putRequest(request, session_id, callback, watch_callback); - responses.try_emplace(response_num, promise->get_future()); - responses.try_emplace(watch_response_num, watch_promise->get_future()); - } - else - { - test_keeper_storage->putRequest(request, session_id, callback); - responses.try_emplace(response_num, promise->get_future()); - } - } - else - { - zkutil::ResponseCallback callback = [promise] (const Coordination::ZooKeeperResponsePtr & response) - { - promise->set_value(response); - }; - test_keeper_storage->putRequest(request, session_id, callback); - responses.try_emplace(response_id_counter++, promise->get_future()); - } - - return opnum; + test_keeper_storage_dispatcher->putRequest(request, session_id); + return std::make_pair(opnum, xid); } } diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h index 14e38ae6bd5..e2de33a5156 100644 --- a/src/Server/TestKeeperTCPHandler.h +++ b/src/Server/TestKeeperTCPHandler.h @@ -3,10 +3,11 @@ #include #include "IServer.h" #include +#include #include #include #include -#include +#include #include #include #include @@ -27,15 +28,14 @@ private: IServer & server; Poco::Logger * log; Context global_context; - std::shared_ptr test_keeper_storage; + std::shared_ptr test_keeper_storage_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan session_timeout; int64_t session_id; Stopwatch session_stopwatch; SocketInterruptablePollWrapperPtr poll_wrapper; - - size_t response_id_counter = 0; - std::unordered_map responses; + ConcurrentBoundedQueue responses; + Coordination::XID close_xid = Coordination::CLOSE_XID; /// Streams for reading/writing from/to client connection socket. std::shared_ptr in; @@ -46,8 +46,8 @@ private: void sendHandshake(); void receiveHandshake(); - Coordination::OpNum receiveRequest(); - zkutil::TestKeeperStorage::AsyncResponse putCloseRequest(); + std::pair receiveRequest(); + bool finish(); }; } From 3668885f6fd2b3e5d01771edf7656df03227d992 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 19 Jan 2021 17:42:34 +0300 Subject: [PATCH 102/697] Fix translation of multiword types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Заменил выражение "типы с названием из нескольких слов" на "составные типы". --- .../data-types/multiword-types.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 docs/ru/sql-reference/data-types/multiword-types.md diff --git a/docs/ru/sql-reference/data-types/multiword-types.md b/docs/ru/sql-reference/data-types/multiword-types.md new file mode 100644 index 00000000000..4c08ea8ee92 --- /dev/null +++ b/docs/ru/sql-reference/data-types/multiword-types.md @@ -0,0 +1,29 @@ +--- +toc_priority: 61 +toc_title: Составные типы +--- + +# Составные типы {#multiword-types} + +При создании таблиц вы также можете использовать типы данных с названием, состоящим из нескольких слов. Это необходимо для лучшей совместимости с SQL. + +## Поддержка составных типов {#multiword-types-support} + +| Составные типы | Обычные типы | +|-------------------------------------|-----------------------------------------------------------| +| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | +| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | +| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | +| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | +| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | +| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/data-types/multiword-types/) From ace6d906b0b7a2a8b0f45b95021a618f44b9d21e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 17:45:45 +0300 Subject: [PATCH 103/697] Fix normal close scenario --- src/Common/ZooKeeper/TestKeeperStorage.cpp | 4 ++-- src/Common/ZooKeeper/TestKeeperStorage.h | 4 ++-- src/Server/TestKeeperTCPHandler.cpp | 11 +++++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Common/ZooKeeper/TestKeeperStorage.cpp b/src/Common/ZooKeeper/TestKeeperStorage.cpp index 4f1300cde8c..e7300939821 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.cpp +++ b/src/Common/ZooKeeper/TestKeeperStorage.cpp @@ -646,7 +646,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const auto response = std::make_shared(); response->xid = zk_request->xid; response->zxid = getZXID(); - results.push_front(ResponseForSession{session_id, response}); + results.push_back(ResponseForSession{session_id, response}); } else { @@ -690,7 +690,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const response->xid = zk_request->xid; response->zxid = getZXID(); - results.push_front(ResponseForSession{session_id, response}); + results.push_back(ResponseForSession{session_id, response}); } return results; diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Common/ZooKeeper/TestKeeperStorage.h index 5afa5032bcf..2196273b3ba 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.h +++ b/src/Common/ZooKeeper/TestKeeperStorage.h @@ -37,7 +37,7 @@ public: Coordination::ZooKeeperResponsePtr response; }; - using ResponsesForSessions = std::deque; + using ResponsesForSessions = std::vector; struct RequestForSession { @@ -45,7 +45,7 @@ public: Coordination::ZooKeeperRequestPtr request; }; - using RequestsForSessions = std::deque; + using RequestsForSessions = std::vector; using Container = std::map; using Ephemerals = std::unordered_map>; diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index e81a2e9ef99..90aec9ce66f 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -298,7 +298,7 @@ void TestKeeperTCPHandler::runImpl() using namespace std::chrono_literals; PollResult result = poll_wrapper->poll(session_timeout); - if (result.has_requests) + if (result.has_requests && !close_received) { do { @@ -308,6 +308,8 @@ void TestKeeperTCPHandler::runImpl() { LOG_DEBUG(log, "Received close event with xid {} for session id #{}", received_xid, session_id); close_xid = received_xid; + close_received = true; + break; } else if (received_op == Coordination::OpNum::Heartbeat) { @@ -325,8 +327,8 @@ void TestKeeperTCPHandler::runImpl() { if (response->xid == close_xid) { - close_received = true; - break; + LOG_DEBUG(log, "Session #{} successfuly closed", session_id); + return; } if (response->error == Coordination::Error::ZOK) @@ -337,9 +339,6 @@ void TestKeeperTCPHandler::runImpl() } } - if (close_received) - break; - if (result.error) throw Exception("Exception happened while reading from socket", ErrorCodes::SYSTEM_ERROR); From f4a718aab946742657a0813e055e581ffb2bbdaf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 17:53:51 +0300 Subject: [PATCH 104/697] Fix tests. --- src/Interpreters/ActionsDAG.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 60443e7656b..4eae60488b9 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1108,7 +1108,11 @@ std::pair ActionsDAG::splitActionsBeforeArrayJoin std::pair ActionsDAG::splitActionsForFilter(const std::string & column_name) const { - auto it = index.find(column_name); + auto it = index.begin(); + for (; it != index.end(); ++it) + if ((*it)->result_name == column_name) + break; + if (it == index.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", From 0ee5629527dc35ffb707bc2ddececfcc082f04bb Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 18:10:49 +0300 Subject: [PATCH 105/697] Fix style --- src/Common/ZooKeeper/TestKeeperStorage.cpp | 1 - src/Common/ya.make | 1 + src/Server/TestKeeperTCPHandler.cpp | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/TestKeeperStorage.cpp b/src/Common/ZooKeeper/TestKeeperStorage.cpp index e7300939821..e364b0efca9 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.cpp +++ b/src/Common/ZooKeeper/TestKeeperStorage.cpp @@ -14,7 +14,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TIMEOUT_EXCEEDED; extern const int BAD_ARGUMENTS; } diff --git a/src/Common/ya.make b/src/Common/ya.make index 5b5da618bbe..4f2f1892a88 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -85,6 +85,7 @@ SRCS( ZooKeeper/IKeeper.cpp ZooKeeper/TestKeeper.cpp ZooKeeper/TestKeeperStorage.cpp + ZooKeeper/TestKeeperStorageDispatcher.cpp ZooKeeper/ZooKeeper.cpp ZooKeeper/ZooKeeperCommon.cpp ZooKeeper/ZooKeeperConstants.cpp diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index 90aec9ce66f..f928c10c856 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -27,7 +27,6 @@ namespace ErrorCodes { extern const int SYSTEM_ERROR; extern const int UNEXPECTED_PACKET_FROM_CLIENT; - extern const int LOGICAL_ERROR; } struct PollResult From 6c6bf60937e76fcf27433a796b3d4be0c51ecd32 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 18:23:32 +0300 Subject: [PATCH 106/697] Rename function --- src/Interpreters/Context.cpp | 16 ++++++++-------- src/Interpreters/Context.h | 2 +- src/Server/TestKeeperTCPHandler.cpp | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ea10024b3cb..6cf977dac34 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -305,8 +305,8 @@ struct ContextShared mutable zkutil::ZooKeeperPtr zookeeper; /// Client for ZooKeeper. ConfigurationPtr zookeeper_config; /// Stores zookeeper configs - mutable std::mutex test_keeper_storage_mutex; - mutable std::shared_ptr test_keeper_storage; + mutable std::mutex test_keeper_storage_dispatcher_mutex; + mutable std::shared_ptr test_keeper_storage_dispatcher; mutable std::mutex auxiliary_zookeepers_mutex; mutable std::map auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients. ConfigurationPtr auxiliary_zookeepers_config; /// Stores auxiliary zookeepers configs @@ -447,7 +447,7 @@ struct ContextShared /// Stop zookeeper connection zookeeper.reset(); /// Stop test_keeper storage - test_keeper_storage.reset(); + test_keeper_storage_dispatcher.reset(); } bool hasTraceCollector() const @@ -1531,13 +1531,13 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } -std::shared_ptr & Context::getTestKeeperStorage() const +std::shared_ptr & Context::getTestKeeperStorageDispatcher() const { - std::lock_guard lock(shared->test_keeper_storage_mutex); - if (!shared->test_keeper_storage) - shared->test_keeper_storage = std::make_shared(); + std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex); + if (!shared->test_keeper_storage_dispatcher) + shared->test_keeper_storage_dispatcher = std::make_shared(); - return shared->test_keeper_storage; + return shared->test_keeper_storage_dispatcher; } zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index dc8efb058e7..9c8d5252373 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -513,7 +513,7 @@ public: std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; - std::shared_ptr & getTestKeeperStorage() const; + std::shared_ptr & getTestKeeperStorageDispatcher() const; /// Set auxiliary zookeepers configuration at server starting or configuration reloading. void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config); diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index f928c10c856..b4192b6c9fb 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -185,7 +185,7 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S , server(server_) , log(&Poco::Logger::get("TestKeeperTCPHandler")) , global_context(server.context()) - , test_keeper_storage_dispatcher(global_context.getTestKeeperStorage()) + , test_keeper_storage_dispatcher(global_context.getTestKeeperStorageDispatcher()) , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) , session_id(test_keeper_storage_dispatcher->getSessionID()) From b4addbf2c9b86abc5b99381133207c160cd6c732 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 19 Jan 2021 18:24:37 +0300 Subject: [PATCH 107/697] Fix translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил перевод. --- docs/en/operations/settings/settings.md | 6 +++--- docs/ru/operations/settings/settings.md | 4 ++-- docs/ru/sql-reference/functions/string-functions.md | 6 +++--- docs/ru/sql-reference/statements/create/table.md | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c6e13451cef..d3a4d50d21c 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2530,12 +2530,12 @@ See examples in [UNION](../../sql-reference/statements/select/union.md). ## data_type_default_nullable {#data_type_default_nullable} -Allows data type without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). +Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). Possible values: -- 1 — The data type in column definition is set to `Nullable` by default. -- 0 — The data type in column definition is set to not `Nullable` by default. +- 1 — The data types in column definitions are set to `Nullable` by default. +- 0 — The data types in column definitions are set to not `Nullable` by default. Default value: `0`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 9118af01b04..fe967944272 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2402,8 +2402,8 @@ WHERE 0 Возможные значения: -- 1 — тип данных в определении столбца задан по умолчанию как `Nullable`. -- 0 — тип данных в определении столбца не задан по умолчанию как `Nullable`. +- 1 — типы данных в определении столбца заданы по умолчанию как `Nullable`. +- 0 — типы данных в определении столбца не заданы по умолчанию как `Nullable`. Значение по умолчанию: `0`. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index cba5c1bc27f..aeb0652cc18 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -557,9 +557,9 @@ SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 ## encodeXMLComponent {#encode-xml-component} -Экранирует символы для размещения строки в текстовом узле XML или атрибуте. +Экранирует символы для размещения строки в текстовом узле или атрибуте XML. -Следующие пять встроенных XML-элементов будут заменены: `<`, `&`, `>`, `"`, `'`. +Экранируются символы, которые в формате XML являются зарезервированными (служебными): `<`, `&`, `>`, `"`, `'`. **Синтаксис** @@ -573,7 +573,7 @@ encodeXMLComponent(x) **Возвращаемое значение** -- Последовательность символов, включая и экранируемые. +- Строка, в которой зарезервированные символы экранированы. Тип: [String](../../sql-reference/data-types/string.md). diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index eb93875d4ee..133d54f2ebd 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -44,9 +44,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines). -## Модификаторы NULL или NOT NULL {#null-modifiers} +## Модификатор NULL или NOT NULL {#null-modifiers} -Модификаторы `NULL` and `NOT NULL` после установления типа данных в определении столбца позволяют или не позволяют ему быть типом [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). +Модификатор `NULL` или `NOT NULL`, указанный после типа данных в определении столбца, позволяет или не позволяет типу данных быть [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). Если тип не `Nullable` и указан модификатор `NULL`, то столбец будет иметь тип `Nullable`; если `NOT NULL`, то не `Nullable`. Например, `INT NULL` то же, что и `Nullable(INT)`. Если тип `Nullable` и указаны модификаторы `NULL` или `NOT NULL`, то будет вызвано исключение. From 7e71a5b1b0c68f8ededa72c51351440472ca8ad8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 18:25:08 +0300 Subject: [PATCH 108/697] Fix new lines --- src/Common/ZooKeeper/TestKeeperStorage.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Common/ZooKeeper/TestKeeperStorage.h index 2196273b3ba..2df505d3e34 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.h +++ b/src/Common/ZooKeeper/TestKeeperStorage.h @@ -81,8 +81,6 @@ public: { return session_id_counter.fetch_add(1); } - - }; } From df56590e2189459faf16e5e529ab438e72e10830 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 18:35:32 +0300 Subject: [PATCH 109/697] Update test. --- ..._explain_select_with_union_query.reference | 144 +++++++++--------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference index e4aac5bda16..40c99db429d 100644 --- a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference +++ b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference @@ -1,252 +1,252 @@ Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Distinct Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) Union - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) - Expression (Projection + Before ORDER BY) + Expression ((Projection + Before ORDER BY)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemOne) From 8cdfbd996bbb0c06e72f1c2aec9a20c179b2dd48 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 18:51:52 +0300 Subject: [PATCH 110/697] Fix header --- src/Common/ZooKeeper/TestKeeperStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Common/ZooKeeper/TestKeeperStorage.h index 2df505d3e34..21b1ce16c32 100644 --- a/src/Common/ZooKeeper/TestKeeperStorage.h +++ b/src/Common/ZooKeeper/TestKeeperStorage.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace zkutil { From 56f19e4790b235337611456cc44e4486df47e970 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 18:52:28 +0300 Subject: [PATCH 111/697] Remove unused headers --- src/Server/TestKeeperTCPHandler.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h index e2de33a5156..2115f1cf11f 100644 --- a/src/Server/TestKeeperTCPHandler.h +++ b/src/Server/TestKeeperTCPHandler.h @@ -11,7 +11,6 @@ #include #include #include -#include namespace DB { From 374cee47e040e977e67b255d1685186cd285ef19 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Jan 2021 19:22:40 +0300 Subject: [PATCH 112/697] Fix typo --- src/Server/TestKeeperTCPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index b4192b6c9fb..8b9047c531c 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -326,7 +326,7 @@ void TestKeeperTCPHandler::runImpl() { if (response->xid == close_xid) { - LOG_DEBUG(log, "Session #{} successfuly closed", session_id); + LOG_DEBUG(log, "Session #{} successfully closed", session_id); return; } From a69d38649206cd328f894b81e572d48ee7257bec Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Jan 2021 20:09:40 +0300 Subject: [PATCH 113/697] Fix tests. --- src/Processors/QueryPlan/QueryPlan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index d393dbb604f..9de4fc95aa6 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -588,7 +588,7 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); filter_node.step->setStepDescription("(" + description + ")[split]"); - node->step->setStepDescription(filter_step->getStepDescription()); + node->step->setStepDescription(description); return true; } From 5b7af74f84f14f3a45c1b09851a703767acd48e8 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 19 Jan 2021 20:10:23 +0300 Subject: [PATCH 114/697] Fix multiword types.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Внес небольшие поправки. --- docs/en/sql-reference/data-types/multiword-types.md | 2 +- docs/ru/sql-reference/data-types/multiword-types.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md index f55efcd7a51..5012fbb404e 100644 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ b/docs/en/sql-reference/data-types/multiword-types.md @@ -5,7 +5,7 @@ toc_title: Multiword Type Names # Multiword Types {#multiword-types} -When creating tables, you can also use data types with a name consisting of several words. This is necessary for better SQL compatibility. +When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility. ## Multiword Types Support {#multiword-types-support} diff --git a/docs/ru/sql-reference/data-types/multiword-types.md b/docs/ru/sql-reference/data-types/multiword-types.md index 4c08ea8ee92..559755ef989 100644 --- a/docs/ru/sql-reference/data-types/multiword-types.md +++ b/docs/ru/sql-reference/data-types/multiword-types.md @@ -5,7 +5,7 @@ toc_title: Составные типы # Составные типы {#multiword-types} -При создании таблиц вы также можете использовать типы данных с названием, состоящим из нескольких слов. Это необходимо для лучшей совместимости с SQL. +При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL. ## Поддержка составных типов {#multiword-types-support} From 9393e96e1b92fb051c9c616d12dbe559a569abd8 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 19 Jan 2021 20:41:47 +0300 Subject: [PATCH 115/697] Update materialize-mysql.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Внес правки в перевод. --- .../database-engines/materialize-mysql.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index e899f453b5f..77e6400ee69 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -25,8 +25,8 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor ## Виртуальные столбцы {#virtual-columns} -При работе с движком баз данных `MaterializeMySQL` таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) используются с виртуальными столбцами `_sign` и `_version`. - +При работе с движком баз данных `MaterializeMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`. + - `_version` — счетчик транзакций. Тип [UInt64](../../sql-reference/data-types/int-uint.md). - `_sign` — метка удаления. Тип [Int8](../../sql-reference/data-types/int-uint.md). Возможные значения: - `1` — строка не удалена, @@ -51,7 +51,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | -Другие типы не поддерживаются. Если таблица MySQL содержит столбец такого типа, ClickHouse выдаст исключение "необработанный тип данных" и остановит репликацию. +Другие типы не поддерживаются. Если таблица MySQL содержит столбец другого типа, ClickHouse выдаст исключение "Неподдерживаемый тип данных" ("Unhandled data type") и остановит репликацию. Тип [Nullable](../../sql-reference/data-types/nullable.md) поддерживается. @@ -59,38 +59,38 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor ### DDL-запросы {#ddl-queries} -DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может спарсить какой-либо DDL-запрос, то он игнорируется. +DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует. ### Репликация данных {#data-replication} Движок MaterializeMySQL не поддерживает прямые запросы `INSERT`, `DELETE` и `UPDATE`. Однако они поддерживаются с точки зрения репликации данных: -- Запрос `INSERT` в MySQL конвертируется в `INSERT` с `_sign=1`. +- Запрос `INSERT` из MySQL конвертируется в ClickHouse в `INSERT` с `_sign=1`. -- Запрос `DELETE` в MySQL конвертируется в `INSERT` с `_sign=-1`. +- Запрос `DELETE` из MySQL конвертируется в ClickHouse в `INSERT` с `_sign=-1`. -- Запрос `UPDATE` в MySQL конвертируется в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`. +- Запрос `UPDATE` из MySQL конвертируется в ClickHouse в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`. ### Выборка из таблиц движка MaterializeMySQL {#select} Запрос `SELECT` из таблиц движка MaterializeMySQL имеет некоторую специфику: -- Если `_version` не указан в запросе `SELECT`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`. +- Если в запросе `SELECT` напрямую не указан столбец `_version`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`. -- Если `_sign` не указан в запросе `SELECT`, то по умолчанию используется `WHERE _sign=1`. Таким образом, удаленные строки не включаются в результирующий набор. +- Если в запросе `SELECT` напрямую не указан столбец `_sign`, то по умолчанию используется `WHERE _sign=1`. Таким образом, удаленные строки не включаются в результирующий набор. -### Индекс конверсии {#index-conversion} +### Конвертация индексов {#index-conversion} Секции `PRIMARY KEY` и `INDEX` в MySQL конвертируются в кортежи `ORDER BY` в таблицах ClickHouse. -ClickHouse имеет только один физический порядок, который определяется секцией `ORDER BY`. Чтобы создать новый физический порядок, используйте [материализованные представления](../../sql-reference/statements/create/view.md#materialized). +В таблицах ClickHouse данные физически хранятся в том порядке, который определяется секцией `ORDER BY`. Чтобы физически перегруппировать данные, используйте [материализованные представления](../../sql-reference/statements/create/view.md#materialized). **Примечание** - Строки с `_sign=-1` физически не удаляются из таблиц. - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`. - Репликация может быть легко нарушена. -- Операции вручную с базами данных и таблицами запрещены. +- Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены. ## Примеры использования {#examples-of-use} From e4350e078ce1754597b5ace38d05ed5cf4e74b70 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 20 Jan 2021 00:42:31 +0300 Subject: [PATCH 116/697] Add ability to distinguish remote exceptions from local --- src/Client/Connection.cpp | 5 ++++- src/Common/Exception.cpp | 3 ++- src/Common/Exception.h | 7 ++++++- src/IO/ReadHelpers.cpp | 4 ++-- src/IO/ReadHelpers.h | 2 +- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index ef114490c51..084f169bb9f 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -803,6 +803,9 @@ Packet Connection::receivePacket(std::function async_ } catch (Exception & e) { + /// This is to consider ATTEMPT_TO_READ_AFTER_EOF as a remote exception. + e.setRemoteException(); + /// Add server address to exception message, if need. if (e.code() != ErrorCodes::UNKNOWN_PACKET_FROM_SERVER) e.addMessage("while receiving packet from " + getDescription()); @@ -892,7 +895,7 @@ void Connection::setDescription() std::unique_ptr Connection::receiveException() { - return std::make_unique(readException(*in, "Received from " + getDescription())); + return std::make_unique(readException(*in, "Received from " + getDescription(), true /* remote */)); } diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index b782471a4e8..231b45a49c6 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -50,8 +50,9 @@ void handle_error_code([[maybe_unused]] const std::string & msg, int code) ErrorCodes::increment(code); } -Exception::Exception(const std::string & msg, int code) +Exception::Exception(const std::string & msg, int code, bool remote_) : Poco::Exception(msg, code) + , remote(remote_) { handle_error_code(msg, code); } diff --git a/src/Common/Exception.h b/src/Common/Exception.h index a6190c7ca24..661d31469fe 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,7 +25,7 @@ class Exception : public Poco::Exception { public: Exception() = default; - Exception(const std::string & msg, int code); + Exception(const std::string & msg, int code, bool remote_ = false); Exception(const std::string & msg, const Exception & nested, int code); Exception(int code, const std::string & message) @@ -61,12 +61,17 @@ public: extendedMessage(message); } + /// Used to distinguish local exceptions from the one that was received from remote node. + void setRemoteException(bool remote_ = true) { remote = remote_; } + bool isRemoteException() const { return remote; } + std::string getStackTraceString() const; private: #ifndef STD_EXCEPTION_HAS_STACK_TRACE StackTrace trace; #endif + bool remote = false; const char * className() const throw() override { return "DB::Exception"; } }; diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9cd8747da64..76a722a8ad1 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1014,7 +1014,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field) } -Exception readException(ReadBuffer & buf, const String & additional_message) +Exception readException(ReadBuffer & buf, const String & additional_message, bool remote_exception) { int code = 0; String name; @@ -1041,7 +1041,7 @@ Exception readException(ReadBuffer & buf, const String & additional_message) if (!stack_trace.empty()) out << " Stack trace:\n\n" << stack_trace; - return Exception(out.str(), code); + return Exception(out.str(), code, remote_exception); } void readAndThrowException(ReadBuffer & buf, const String & additional_message) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 56c795324e3..de4e87440a2 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1073,7 +1073,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field); * (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message') * Some additional message could be appended to exception (example: you could add information about from where it was received). */ -Exception readException(ReadBuffer & buf, const String & additional_message = ""); +Exception readException(ReadBuffer & buf, const String & additional_message = "", bool remote_exception = false); void readAndThrowException(ReadBuffer & buf, const String & additional_message = ""); From 8a0081639612b2d2221991976615b8cbf882f551 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 19 Jan 2021 22:22:58 +0300 Subject: [PATCH 117/697] Do not mark file for distributed send as broken on EOF - the sender will got ATTEMPT_TO_READ_AFTER_EOF (added in 946c275dfbe901cfec87deecc845f72215350b9d) when the client just go away, i.e. server had been restarted, and this is incorrect to mark the file as broken in this case. - since #18853 the file will be checked on the sender locally, and in case the file was truncated CANNOT_READ_ALL_DATA will be thrown. But before #18853 the sender will not receive ATTEMPT_TO_READ_AFTER_EOF from the client in case of file was truncated on the sender, since the client will just wait for more data, IOW just hang. - and I don't see how ATTEMPT_TO_READ_AFTER_EOF can be received while reading local file. --- src/Storages/Distributed/DirectoryMonitor.cpp | 35 ++++++++++++------- src/Storages/Distributed/DirectoryMonitor.h | 1 - 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 2a29f2559b6..1b0b78ba0d9 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -155,6 +155,27 @@ namespace return header; } + + /// remote_error argument is used to decide whether some errors should be + /// ignored or not, in particular: + /// + /// - ATTEMPT_TO_READ_AFTER_EOF should not be ignored + /// if we receive it from remote (receiver), since: + /// - the sender will got ATTEMPT_TO_READ_AFTER_EOF when the client just go away, + /// i.e. server had been restarted + /// - since #18853 the file will be checked on the sender locally, and + /// if there is something wrong with the file itself, we will receive + /// ATTEMPT_TO_READ_AFTER_EOF not from the remote at first + /// and mark batch as broken. + bool isFileBrokenErrorCode(int code, bool remote_error) + { + return code == ErrorCodes::CHECKSUM_DOESNT_MATCH + || code == ErrorCodes::TOO_LARGE_SIZE_COMPRESSED + || code == ErrorCodes::CANNOT_READ_ALL_DATA + || code == ErrorCodes::UNKNOWN_CODEC + || code == ErrorCodes::CANNOT_DECOMPRESS + || (!remote_error && code == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + } } @@ -571,7 +592,7 @@ struct StorageDistributedDirectoryMonitor::Batch } catch (const Exception & e) { - if (isFileBrokenErrorCode(e.code())) + if (isFileBrokenErrorCode(e.code(), e.isRemoteException())) { tryLogCurrentException(parent.log, "Failed to send batch due to"); batch_broken = true; @@ -801,16 +822,6 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map } } -bool StorageDistributedDirectoryMonitor::isFileBrokenErrorCode(int code) -{ - return code == ErrorCodes::CHECKSUM_DOESNT_MATCH - || code == ErrorCodes::TOO_LARGE_SIZE_COMPRESSED - || code == ErrorCodes::CANNOT_READ_ALL_DATA - || code == ErrorCodes::UNKNOWN_CODEC - || code == ErrorCodes::CANNOT_DECOMPRESS - || code == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF; -} - void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_path) const { const auto last_path_separator_pos = file_path.rfind('/'); @@ -837,7 +848,7 @@ void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_p bool StorageDistributedDirectoryMonitor::maybeMarkAsBroken(const std::string & file_path, const Exception & e) const { /// mark file as broken if necessary - if (isFileBrokenErrorCode(e.code())) + if (isFileBrokenErrorCode(e.code(), e.isRemoteException())) { markAsBroken(file_path); return true; diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index bc897136786..c73b79761ca 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -70,7 +70,6 @@ private: void processFile(const std::string & file_path); void processFilesWithBatching(const std::map & files); - static bool isFileBrokenErrorCode(int code); void markAsBroken(const std::string & file_path) const; bool maybeMarkAsBroken(const std::string & file_path, const Exception & e) const; From 701b078866be7ae3f08e4c5b5ea2157017466d3a Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Wed, 20 Jan 2021 01:39:12 +0300 Subject: [PATCH 118/697] First draft --- docs/en/sql-reference/table-functions/file.md | 32 ++++++-- .../sql-reference/table-functions/remote.md | 74 ++++++++++++------- docs/en/sql-reference/table-functions/url.md | 42 ++++++++--- 3 files changed, 106 insertions(+), 42 deletions(-) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index beab691ad0e..2ba55c1aa90 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -5,7 +5,11 @@ toc_title: file # file {#file} -Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. +Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. + +`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables. + +**Syntax** ``` sql file(path, format, structure) @@ -21,7 +25,7 @@ file(path, format, structure) A table with the specified structure for reading or writing data in the specified file. -**Example** +**Examples** Setting `user_files_path` and the contents of the file `test.csv`: @@ -39,8 +43,8 @@ Table from`test.csv` and selection of the first two rows from it: ``` sql SELECT * -FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2 +FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); +LIMIT 2; ``` ``` text @@ -52,9 +56,25 @@ LIMIT 2 ``` sql -- getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10 +SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10; ``` +Inserting data from a file into a table: + +``` sql +CREATE TABLE file_engine_table (column1 UInt32, column2 UInt32, column3 UInt32) ENGINE=File(CSV); +INSERT INTO file_engine_table FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); +SELECT * FROM file_engine_table; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + + **Globs in path** Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). @@ -116,4 +136,4 @@ FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32') - [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/file/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 71b1006fc5d..ef74cf8ca7a 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -5,9 +5,11 @@ toc_title: remote # remote, remoteSecure {#remote-remotesecure} -Allows you to access remote servers without creating a `Distributed` table. +Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with secured connection. -Signatures: +Both functions can be used in `SELECT` and `INSERT` queries. + +**Syntax** ``` sql remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) @@ -16,13 +18,39 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) ``` -`addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the server’s config file (by default, 9000). -`sharding_key` - We can specify sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. +**Input parameters** -!!! important "Important" - The port is required for an IPv6 address. +- `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. + + The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. + + The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440). -Examples: + The port is required for an IPv6 address. + + Type: [String](../../sql-reference/data-types/string.md). +- `db` - Database name. Type: [String](../../sql-reference/data-types/string.md). +- `table` - Table name. Type: [String](../../sql-reference/data-types/string.md). +- `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md). +- `password` - User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md). +- `sharding_key` - Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +Dataset from remote servers. + +**Usage** + +Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and don’t use the `remote` table function. + +The `remote` table function can be useful in the following cases: + +- Accessing a specific server for data comparison, debugging, and testing. +- Queries between various ClickHouse clusters for research purposes. +- Infrequent distributed requests that are made manually. +- Distributed requests where the set of servers is re-defined each time. + +**Adresses** ``` text example01-01-1 @@ -33,9 +61,7 @@ localhost [2a02:6b8:0:1111::11]:9000 ``` -Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). - -Example: +Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). Example: ``` text example01-01-1,example01-02-1 @@ -55,30 +81,28 @@ example01-{01..02}-1 If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets. -Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. - -Example: +Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. This example specifies two shards that each have two replicas: ``` text example01-{01..02}-{1|2} ``` -This example specifies two shards that each have two replicas. - The number of addresses generated is limited by a constant. Right now this is 1000 addresses. -Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case, the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and don’t use the `remote` table function. +**Examples** -The `remote` table function can be useful in the following cases: +Selecting data from a remote server: -- Accessing a specific server for data comparison, debugging, and testing. -- Queries between various ClickHouse clusters for research purposes. -- Infrequent distributed requests that are made manually. -- Distributed requests where the set of servers is re-defined each time. +``` sql +SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3; +``` -If the user is not specified, `default` is used. -If the password is not specified, an empty password is used. +Inserting data from a remote server into a table: -`remoteSecure` - same as `remote` but with secured connection. Default port — [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) from config or 9440. +``` sql +CREATE TABLE remote_engine_table (name String, value UInt32) ENGINE=Distributed(cluster, default, hits); +INSERT INTO remote_engine_table FROM remote('127.0.0.1', db.remote_engine_table); +SELECT * FROM remote_engine_table; +``` -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/remote/) diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index ea649e01994..0b091224cf4 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -5,20 +5,40 @@ toc_title: url # url {#url} -`url(URL, format, structure)` - returns a table created from the `URL` with given -`format` and `structure`. +`url` function creates a table from the `URL` with given `format` and `structure`. -URL - HTTP or HTTPS server address, which can accept `GET` and/or `POST` requests. +`url` function can be used in `SELECT` and `INSERT` queries on data in [URL](../../engines/table-engines/special/url.md) tables. -format - [format](../../interfaces/formats.md#formats) of the data. - -structure - table structure in `'UserID UInt64, Name String'` format. Determines column names and types. - -**Example** +**Syntax** ``` sql --- getting the first 3 lines of a table that contains columns of String and UInt32 type from HTTP-server which answers in CSV format. -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3 +url(URL, format, structure) ``` -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/url/) +**Input parameters** + +- `URL` - HTTP or HTTPS server address, which can accept `GET` and/or `POST` requests. Type: [String](../../sql-reference/data-types/string.md). +- `format` - [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). +- `structure` - Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). + +**Returned value** + +A table with the specified format and structure and with data from the defined URL. + +**Examples** + +Getting the first 3 lines of a table that contains columns of `String` and `UInt32` type from HTTP-server which answers in `CSV` format. + +``` sql +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; +``` + +Inserting data from a URL into a table: + +``` sql +CREATE TABLE url_engine_table (column1 String, column2 UInt32) ENGINE=URL('http://127.0.0.1:12345/', CSV); +INSERT INTO url_engine_table FROM url('http://127.0.0.1:12345/', 'CSV', 'column1 String, column2 UInt32'); +SELECT * FROM url_engine_table; +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/url/) From 2a511b169bb64f340e7e6770a656d96051d9ede5 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Wed, 20 Jan 2021 02:02:46 +0300 Subject: [PATCH 119/697] Fixes --- docs/en/sql-reference/table-functions/file.md | 41 ++++++++----------- .../sql-reference/table-functions/remote.md | 9 ++-- docs/en/sql-reference/table-functions/url.md | 2 +- 3 files changed, 23 insertions(+), 29 deletions(-) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 2ba55c1aa90..2dd12a8b44b 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -17,9 +17,9 @@ file(path, format, structure) **Input parameters** -- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. +- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. - `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -39,7 +39,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv 78,43,45 ``` -Table from`test.csv` and selection of the first two rows from it: +Table from `test.csv` and selection of the first two rows from it: ``` sql SELECT * @@ -53,9 +53,9 @@ LIMIT 2; │ 3 │ 2 │ 1 │ └─────────┴─────────┴─────────┘ ``` +Getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file: ``` sql --- getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10; ``` @@ -75,7 +75,7 @@ SELECT * FROM file_engine_table; ``` -**Globs in path** +## Globs in Path {#globs-in-path} Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). @@ -88,31 +88,25 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref **Example** -1. Suppose we have several files with the following relative paths: +Suppose we have several files with the following relative paths: -- ‘some_dir/some_file_1’ -- ‘some_dir/some_file_2’ -- ‘some_dir/some_file_3’ -- ‘another_dir/some_file_1’ -- ‘another_dir/some_file_2’ -- ‘another_dir/some_file_3’ +- 'some_dir/some_file_1' +- 'some_dir/some_file_2' +- 'some_dir/some_file_3' +- 'another_dir/some_file_1' +- 'another_dir/some_file_2' +- 'another_dir/some_file_3' -1. Query the amount of rows in these files: - - +Query the amount of rows in these files: ``` sql -SELECT count(*) -FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32') +SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32'); ``` -1. Query the amount of rows in all files of these two directories: - - +Query the amount of rows in all files of these two directories: ``` sql -SELECT count(*) -FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') +SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` !!! warning "Warning" @@ -123,8 +117,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') Query the data from files named `file000`, `file001`, … , `file999`: ``` sql -SELECT count(*) -FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32') +SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); ``` ## Virtual Columns {#virtual-columns} diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index ef74cf8ca7a..5dc915a8522 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -22,13 +22,14 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) - `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. - The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. + The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. - The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440). + The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440). - The port is required for an IPv6 address. + The port is required for an IPv6 address. + + Type: [String](../../sql-reference/data-types/string.md). - Type: [String](../../sql-reference/data-types/string.md). - `db` - Database name. Type: [String](../../sql-reference/data-types/string.md). - `table` - Table name. Type: [String](../../sql-reference/data-types/string.md). - `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 0b091224cf4..1ef878f1074 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -7,7 +7,7 @@ toc_title: url `url` function creates a table from the `URL` with given `format` and `structure`. -`url` function can be used in `SELECT` and `INSERT` queries on data in [URL](../../engines/table-engines/special/url.md) tables. +`url` function may be used in `SELECT` and `INSERT` queries on data in [URL](../../engines/table-engines/special/url.md) tables. **Syntax** From 99d6676857155935b67ed3adad7f1549ab6e3deb Mon Sep 17 00:00:00 2001 From: jianmei zhang Date: Wed, 20 Jan 2021 11:32:39 +0800 Subject: [PATCH 120/697] update clickhouse-local docs to latest --- .../operations/utilities/clickhouse-local.md | 17 +++++++++++----- .../operations/utilities/clickhouse-local.md | 20 ++++++++++++++----- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index f93ba139cae..04f9f3660b5 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -16,7 +16,7 @@ By default `clickhouse-local` does not have access to data on the same host, but !!! warning "Warning" It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. -For temporary data, a unique temporary data directory is created by default. If you want to override this behavior, the data directory can be explicitly specified with the `-- --path` option. +For temporary data, a unique temporary data directory is created by default. ## Usage {#usage} @@ -32,15 +32,22 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `-if`, `--input-format` — input format, `TSV` by default. - `-f`, `--file` — path to data, `stdin` by default. -- `-q` `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option. -- `-qf` `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. +- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option. +- `-qf`, `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. - `-N`, `--table` — table name where to put output data, `table` by default. - `-of`, `--format`, `--output-format` — output format, `TSV` by default. +- `-d`, `--database` — default database, `_local` by default. - `--stacktrace` — whether to dump debug output in case of exception. +- `--echo` — print query before execution. - `--verbose` — more details on query execution. -- `-s` — disables `stderr` logging. -- `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. +- `--logger.console` — Log to console. +- `--logger.log` — Log file name. +- `--logger.level` — Log level. +- `--ignore-error` — do not stop processing if a query failed. +- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. +- `--no-system-tables` — do not attach system tables. - `--help` — arguments references for `clickhouse-local`. +- `-V`, `--version` — print version information and exit. Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. diff --git a/docs/zh/operations/utilities/clickhouse-local.md b/docs/zh/operations/utilities/clickhouse-local.md index 3ff38c01651..9b0ae841cd6 100644 --- a/docs/zh/operations/utilities/clickhouse-local.md +++ b/docs/zh/operations/utilities/clickhouse-local.md @@ -3,11 +3,11 @@ toc_priority: 60 toc_title: clickhouse-local --- -# ClickHouse Local {#clickhouse-local} +# clickhouse-local {#clickhouse-local} `clickhouse-local`模式可以使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。 -[ClickHouse SQL语法](../../operations/utilities/clickhouse-local.md)支持对表格数据的查询. +接受表示表格tables的数据,并使用[ClickHouse SQL方言](../../operations/utilities/clickhouse-local.md)查询它们。 `clickhouse-local`使用与ClickHouse Server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。 @@ -16,6 +16,8 @@ toc_title: clickhouse-local !!! warning "警告" 不建议将生产服务器配置加载到`clickhouse-local`因为数据可以在人为错误的情况下被损坏。 +对于临时数据,默认情况下会创建一个唯一的临时数据目录。 + ## 用途 {#usage} 基本用法: @@ -29,14 +31,22 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin - `-S`, `--structure` — 输入数据的表结构。 - `-if`, `--input-format` — 输入格式化类型, 默认是`TSV`。 - `-f`, `--file` — 数据路径, 默认是`stdin`。 -- `-q` `--query` — 要查询的SQL语句使用`;`做分隔符。 +- `-q`, `--query` — 要查询的SQL语句使用`;`做分隔符。您必须指定`query`或`queries-file`选项。 +- `-qf`, `--queries-file` - 包含执行查询的文件路径。您必须指定`query`或`queries-file`选项。 - `-N`, `--table` — 数据输出的表名,默认是`table`。 - `-of`, `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。 +- `-d`, `--database` — 默认数据库名,默认是`_local`。 - `--stacktrace` — 是否在出现异常时输出栈信息。 +- `--echo` — 执行前打印查询。 - `--verbose` — debug显示查询的详细信息。 -- `-s` — 禁用`stderr`输出信息。 -- `--config-file` — 与ClickHouse服务器格式相同配置文件的路径,默认情况下配置为空。 +- `--logger.console` — 日志显示到控制台。 +- `--logger.log` — 日志文件名。 +- `--logger.level` — 日志级别。 +- `--ignore-error` — 当查询失败时,不停止处理。 +- `-c`, `--config-file` — 与ClickHouse服务器格式相同配置文件的路径,默认情况下配置为空。 +- `--no-system-tables` — 不附加系统表。 - `--help` — `clickhouse-local`使用帮助信息。 +- `-V`, `--version` — 打印版本信息并退出。 对于每个ClickHouse配置的参数,也可以单独使用,可以不使用`--config-file`指定。 From 67d880dc3a74c9fb4b135d1a8cff2a85872175a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Jan 2021 12:38:56 +0300 Subject: [PATCH 121/697] Update test. --- .../01576_alias_column_rewrite.reference | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index ebc8be4f79b..679695dd6db 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -26,13 +26,13 @@ Expression (Projection) MergingSorted (Merge sorted streams for ORDER BY) MergeSorting (Merge sorted blocks for ORDER BY) PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY + Add table aliases) + Expression ((Before ORDER BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (MergeTree) Expression (Projection) Limit (preliminary LIMIT) FinishSorting - Expression (Before ORDER BY + Add table aliases) + Expression ((Before ORDER BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union ReadFromStorage (MergeTree with order) @@ -48,20 +48,20 @@ Expression (Projection) ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) optimize_aggregation_in_order -Expression (Projection + Before ORDER BY) +Expression ((Projection + Before ORDER BY)) Aggregating - Expression (Before GROUP BY + Add table aliases) + Expression ((Before GROUP BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (MergeTree) -Expression (Projection + Before ORDER BY) +Expression ((Projection + Before ORDER BY)) Aggregating - Expression (Before GROUP BY + Add table aliases) + Expression ((Before GROUP BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) -Expression (Projection + Before ORDER BY) +Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) From fa3964d36d69062eb091dd7ac3ce877c7c1b91e8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Jan 2021 12:42:55 +0300 Subject: [PATCH 122/697] Fix test. --- src/Processors/QueryPlan/QueryPlan.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 9de4fc95aa6..0e16d97d436 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -565,6 +565,11 @@ static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) return false; const auto & expr = filter_step->getExpression(); + + /// Do not split if there are function like runningDifference. + if (expr->hasStatefulFunctions()) + return false; + auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName()); if (split.second->empty()) From 6847fdf7722cb4d50b57a73708190e991521140e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 20 Jan 2021 14:12:33 +0300 Subject: [PATCH 123/697] fix --- base/harmful/harmful.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 4032fbf3b90..bfb68abbcfb 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -118,7 +118,9 @@ TRAP(logout) TRAP(logwtmp) TRAP(lrand48) TRAP(mallinfo) -//TRAP(mallopt) // Used by tsan +#if !defined(SANITIZER) +TRAP(mallopt) // Used by tsan +#endif TRAP(mblen) TRAP(mbrlen) TRAP(mbrtowc) @@ -193,7 +195,9 @@ TRAP(dbm_nextkey) TRAP(dbm_open) TRAP(dbm_store) TRAP(dirname) -//TRAP(dlerror) // Used by tsan +#if !defined(SANITIZER) +TRAP(dlerror) // Used by tsan +#endif TRAP(ftw) TRAP(getc_unlocked) //TRAP(getenv) // Ok at program startup From 70679e4ee1964693d419a35d23bc2bdbdf8fcbb6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Jan 2021 15:11:26 +0300 Subject: [PATCH 124/697] Fix test keeper handler --- src/Server/TestKeeperTCPHandler.cpp | 70 +++++++++++++++++------------ src/Server/TestKeeperTCPHandler.h | 9 ++-- 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index 8b9047c531c..7b02996019e 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #ifdef POCO_HAVE_FD_EPOLL #include @@ -36,6 +38,36 @@ struct PollResult bool error; }; +/// Queue with mutex. As simple as possible. +class ThreadSafeResponseQueue +{ +private: + mutable std::mutex queue_mutex; + std::queue queue; +public: + void push(const Coordination::ZooKeeperResponsePtr & response) + { + std::lock_guard lock(queue_mutex); + queue.push(response); + } + bool tryPop(Coordination::ZooKeeperResponsePtr & response) + { + std::lock_guard lock(queue_mutex); + if (!queue.empty()) + { + response = queue.front(); + queue.pop(); + return true; + } + return false; + } + size_t size() const + { + std::lock_guard lock(queue_mutex); + return queue.size(); + } +}; + struct SocketInterruptablePollWrapper { int sockfd; @@ -159,12 +191,10 @@ struct SocketInterruptablePollWrapper result.has_requests = true; else { - do - { - UInt8 response_byte; - readIntBinary(response_byte, response_in); - result.has_responses = true; - } while (response_in.available()); /// Just to drain all of them + /// Skip all of them, we are not interested in exact + /// amount because responses ordered in responses queue. + response_in.ignore(); + result.has_responses = true; } } } @@ -190,7 +220,7 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) , session_id(test_keeper_storage_dispatcher->getSessionID()) , poll_wrapper(std::make_unique(socket_)) - , responses(1000) + , responses(std::make_unique()) { } @@ -282,7 +312,7 @@ void TestKeeperTCPHandler::runImpl() auto response_fd = poll_wrapper->getResponseFD(); auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response) { - responses.push(response); + responses->push(response); UInt8 single_byte = 1; [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte)); }; @@ -322,7 +352,7 @@ void TestKeeperTCPHandler::runImpl() if (result.has_responses) { Coordination::ZooKeeperResponsePtr response; - while (responses.tryPop(response)) + while (responses->tryPop(response)) { if (response->xid == close_xid) { @@ -344,8 +374,7 @@ void TestKeeperTCPHandler::runImpl() if (session_stopwatch.elapsedMicroseconds() > static_cast(session_timeout.totalMicroseconds())) { LOG_DEBUG(log, "Session #{} expired", session_id); - if (!finish()) - LOG_DEBUG(log, "Cannot sent close for expired session #{}", session_id); + finish(); break; } } @@ -353,30 +382,15 @@ void TestKeeperTCPHandler::runImpl() catch (const Exception & ex) { LOG_INFO(log, "Got exception processing session #{}: {}", session_id, getExceptionMessage(ex, true)); - if (!finish()) - LOG_DEBUG(log, "Cannot sent close for session #{}", session_id); + finish(); } - } -bool TestKeeperTCPHandler::finish() +void TestKeeperTCPHandler::finish() { Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = close_xid; test_keeper_storage_dispatcher->putRequest(request, session_id); - - Coordination::ZooKeeperResponsePtr response; - bool finished = false; - while (responses.tryPop(response, operation_timeout.totalMilliseconds())) - { - if (response->xid == close_xid) - { - finished = true; - response->write(*out); - break; - } - } - return finished; } std::pair TestKeeperTCPHandler::receiveRequest() diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h index 2115f1cf11f..46b4454b319 100644 --- a/src/Server/TestKeeperTCPHandler.h +++ b/src/Server/TestKeeperTCPHandler.h @@ -3,7 +3,6 @@ #include #include "IServer.h" #include -#include #include #include #include @@ -17,6 +16,8 @@ namespace DB struct SocketInterruptablePollWrapper; using SocketInterruptablePollWrapperPtr = std::unique_ptr; +class ThreadSafeResponseQueue; +using ThreadSafeResponseQueuePtr = std::unique_ptr; class TestKeeperTCPHandler : public Poco::Net::TCPServerConnection { @@ -33,7 +34,9 @@ private: int64_t session_id; Stopwatch session_stopwatch; SocketInterruptablePollWrapperPtr poll_wrapper; - ConcurrentBoundedQueue responses; + + ThreadSafeResponseQueuePtr responses; + Coordination::XID close_xid = Coordination::CLOSE_XID; /// Streams for reading/writing from/to client connection socket. @@ -46,7 +49,7 @@ private: void receiveHandshake(); std::pair receiveRequest(); - bool finish(); + void finish(); }; } From cf888740887917bb5de3e84d0f01dcbbe9890ee8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 20 Jan 2021 15:30:41 +0300 Subject: [PATCH 125/697] done --- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2a8fdce869b..abd39cc3d4b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -946,7 +946,7 @@ bool Context::hasScalar(const String & name) const void Context::addQueryAccessInfo(const String & quoted_database_name, const String & full_quoted_table_name, const Names & column_names) { assert(global_context != this || getApplicationType() == ApplicationType::LOCAL); - auto lock = getLock(); + std::lock_guard lock(query_access_info.mutex); query_access_info.databases.emplace(quoted_database_name); query_access_info.tables.emplace(full_quoted_table_name); for (const auto & column_name : column_names) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 79140f0d209..1bd901d40c6 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -194,9 +194,36 @@ private: /// Record entities accessed by current query, and store this information in system.query_log. struct QueryAccessInfo { - std::set databases; - std::set tables; - std::set columns; + QueryAccessInfo() = default; + + QueryAccessInfo(const QueryAccessInfo & rhs) + { + std::lock_guard lock(rhs.mutex); + databases = rhs.databases; + tables = rhs.tables; + columns = rhs.columns; + } + + QueryAccessInfo(QueryAccessInfo && rhs) = delete; + + QueryAccessInfo & operator=(QueryAccessInfo rhs) + { + swap(rhs); + return *this; + } + + void swap(QueryAccessInfo & rhs) + { + std::swap(databases, rhs.databases); + std::swap(tables, rhs.tables); + std::swap(columns, rhs.columns); + } + + /// To prevent a race between copy-constructor and other uses of this structure. + mutable std::mutex mutex{}; + std::set databases{}; + std::set tables{}; + std::set columns{}; }; QueryAccessInfo query_access_info; From 1d1e53f5d3c68f82a5491e15a972983c6f7f8add Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 20 Jan 2021 15:37:53 +0300 Subject: [PATCH 126/697] style --- src/Interpreters/Context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1bd901d40c6..a74875376ec 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -212,7 +212,7 @@ private: return *this; } - void swap(QueryAccessInfo & rhs) + void swap(QueryAccessInfo & rhs) { std::swap(databases, rhs.databases); std::swap(tables, rhs.tables); From d0fc7098a7f72ab291bf95d8fd14920054ce56a6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Jan 2021 15:58:23 +0300 Subject: [PATCH 127/697] Fix tests. --- tests/queries/0_stateless/01508_explain_header.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01508_explain_header.reference b/tests/queries/0_stateless/01508_explain_header.reference index 5f9e8cfed84..4bfbe1c818b 100644 --- a/tests/queries/0_stateless/01508_explain_header.reference +++ b/tests/queries/0_stateless/01508_explain_header.reference @@ -1,4 +1,4 @@ -Expression (Projection + Before ORDER BY) +Expression ((Projection + Before ORDER BY)) Header: x UInt8 SettingQuotaAndLimits (Set limits and quota after reading from storage) Header: dummy UInt8 From 6cf4ed5c42270385248f6dfd44604319082a1eb4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Jan 2021 16:18:41 +0300 Subject: [PATCH 128/697] Refactor now64 --- src/Functions/now64.cpp | 81 +++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index fb6f7a0366d..ac418312698 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int CANNOT_CLOCK_GETTIME; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace @@ -44,29 +45,77 @@ Field nowSubsecond(UInt32 scale) scale); } -class FunctionNow64 : public IFunction +/// Get the current time. (It is a constant, it is evaluated once for the entire query.) +class ExecutableFunctionNow64 : public IExecutableFunctionImpl +{ +public: + explicit ExecutableFunctionNow64(Field time_) : time_value(time_) {} + + String getName() const override { return "now64"; } + + ColumnPtr execute(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return result_type->createColumnConst(input_rows_count, time_value); + } + +private: + Field time_value; +}; + +class FunctionBaseNow64 : public IFunctionBaseImpl +{ +public: + explicit FunctionBaseNow64(Field time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {} + + String getName() const override { return "now64"; } + + const DataTypes & getArgumentTypes() const override + { + static const DataTypes argument_types; + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(time_value); + } + + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } + +private: + Field time_value; + DataTypePtr return_type; +}; + +class Now64OverloadResolver : public IFunctionOverloadResolverImpl { public: static constexpr auto name = "now64"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return ColumnNumbers{0}; } bool isDeterministic() const override { return false; } - // Return type depends on argument value. - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique(); } + + DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override { UInt32 scale = DataTypeDateTime64::default_scale; - // Type check is similar to the validateArgumentType, trying to keep error codes and messages as close to the said function as possible. - if (!arguments.empty()) + if (arguments.size() > 1) + { + throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (arguments.size() == 1) { const auto & argument = arguments[0]; if (!isInteger(argument.type) || !argument.column || !isColumnConst(*argument.column)) @@ -82,10 +131,10 @@ public: return std::make_shared(scale); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override { const UInt32 scale = assert_cast(result_type.get())->getScale(); - return result_type->createColumnConst(input_rows_count, nowSubsecond(scale)); + return std::make_unique(nowSubsecond(scale), result_type); } }; @@ -93,7 +142,7 @@ public: void registerFunctionNow64(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(FunctionFactory::CaseInsensitive); } } From ac438f8ee7cedb85fef4a9888fe506e3d25e87c2 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 20 Jan 2021 17:31:56 +0300 Subject: [PATCH 129/697] add Sanitizer report issue template --- .github/ISSUE_TEMPLATE/95_sanitizer-report.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/95_sanitizer-report.md diff --git a/.github/ISSUE_TEMPLATE/95_sanitizer-report.md b/.github/ISSUE_TEMPLATE/95_sanitizer-report.md new file mode 100644 index 00000000000..4dfcf17c60f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/95_sanitizer-report.md @@ -0,0 +1,19 @@ +--- +name: Sanitizer alert +about: Potential issue has been found by special code instrumentation +title: '' +labels: testing +assignees: '' + +--- + +(you don't have to strictly follow this form) + +**Describe the bug** +A link to the report + +**How to reproduce** +Try to reproduce the report and copy the tables and queries involved. + +**Error message and/or stacktrace** +You can find additional information in server logs. From 6458d210408e3e3e13fd15556f9c5830f755bb1c Mon Sep 17 00:00:00 2001 From: George Date: Wed, 20 Jan 2021 18:38:35 +0300 Subject: [PATCH 130/697] various fixes --- .../utilities/clickhouse-benchmark.md | 4 +-- .../utilities/clickhouse-benchmark.md | 28 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 49c18b02e2d..92a96f8cd6e 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -40,10 +40,10 @@ clickhouse-benchmark [keys] < queries_file; ## Keys {#clickhouse-benchmark-keys} -- `--query=WORD` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. +- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. +- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. - `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. - `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). - `-r`, `--randomize` — Random order of queries execution if there is more than one input query. diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 4579418b63a..2a883cf3bb5 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -5,7 +5,7 @@ toc_title: clickhouse-benchmark # clickhouse-benchmark {#clickhouse-benchmark} -Устанавливает соединение с сервером ClickHouse и неоднократно посылает указанные запросы. +Устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов. **Синтаксис** @@ -25,7 +25,7 @@ $ echo "single query" | clickhouse-benchmark [keys] $ clickhouse-benchmark [keys] <<< "single query" ``` -Если нужно послать набор запросов, создайте текстовый файл и расположите каждый запрос на отдельной строке в файле. Например: +Если нужно выполнить набор запросов, создайте текстовый файл и расположите каждый запрос на отдельной строке в файле. Например: ``` sql SELECT * FROM system.numbers LIMIT 10000000; @@ -40,22 +40,22 @@ clickhouse-benchmark [keys] < queries_file; ## Ключи {#clickhouse-benchmark-keys} -- `--query=WORD` — запрос для исполнения. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. +- `--query=QUERY` — исполняемый запрос. Если параметр не передан, `clickhouse-benchmark` будет считывать запросы из стандартного ввода. - `-c N`, `--concurrency=N` — количество запросов, которые `clickhouse-benchmark` отправляет одновременно. Значение по умолчанию: 1. -- `-d N`, `--delay=N` — интервал в секундах между промежуточными сообщениями (чтобы отключить сообщения, установите 0). Значение по умолчанию: 1. -- `-h WORD`, `--host=WORD` — хост сервера. Значение по умолчанию: `localhost`. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. -- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: 9000. Для [сравнительного режима](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. +- `-d N`, `--delay=N` — интервал в секундах между промежуточными отчетами (чтобы отключить отчеты, установите 0). Значение по умолчанию: 1. +- `-h HOST`, `--host=HOST` — хост сервера. Значение по умолчанию: `localhost`. Для [режима сравнения](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-h` ключей. +- `-p N`, `--port=N` — порт сервера. Значение по умолчанию: 9000. Для [режима сравнения](#clickhouse-benchmark-comparison-mode) можно использовать несколько `-p` ключей. - `-i N`, `--iterations=N` — общее число запросов. Значение по умолчанию: 0 (вечно будет повторяться). -- `-r`, `--randomize` — случайный порядок выполнения запросов при наличии более одного входного запроса. +- `-r`, `--randomize` — использовать случайный порядок выполнения запросов при наличии более одного входного запроса. - `-s`, `--secure` — используется `TLS` соединение. - `-t N`, `--timelimit=N` — лимит по времени в секундах. `clickhouse-benchmark` перестает отправлять запросы при достижении лимита по времени. Значение по умолчанию: 0 (лимит отключен). -- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [сравнительном режиме](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. -- `--cumulative` — выводит совокупность данных, а не данные за интервал. +- `--confidence=N` — уровень доверия для T-критерия. Возможные значения: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. В [режиме сравнения](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверяет [двухвыборочный t-критерий Стьюдента для независимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне доверия. +- `--cumulative` — выводить статистику за все время работы, а не за последний временной интервал. - `--database=DATABASE_NAME` — имя базы данных ClickHouse. Значение по умолчанию: `default`. -- `--json=FILEPATH` — формат вывода `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит сообщение в указанный JSON-файл. +- `--json=FILEPATH` — дополнительный вывод в формате `JSON`. Когда этот ключ указан, `clickhouse-benchmark` выводит отчет в указанный JSON-файл. - `--user=USERNAME` — имя пользователя ClickHouse. Значение по умолчанию: `default`. - `--password=PSWD` — пароль пользователя ClickHouse. Значение по умолчанию: пустая строка. -- `--stacktrace` — вывод трассировки стека исключений. +- `--stacktrace` — вывод трассировки стека исключений. Когда этот ключ указан, `clickhouse-bencmark` выводит трассировку стека исключений. - `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. - `--help` — показывает справку. @@ -94,17 +94,17 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul - Строка статуса, содержащая (в таком же порядке): - - Конечная точка сервера ClickHouse. + - Endpoint сервера ClickHouse. - Число обработанных запросов. - QPS: количество запросов, выполняемых сервером за секунду в течение `--delay` интервала. - RPS: количество строк, читаемых сервером за секунду в течение `--delay` интервала. - MiB/s: количество Мебибайтов, считываемых сервером за секунду в течение `--delay` интервала. - - result RPS: количество столбцов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. + - result RPS: количество строк, добавленное сервером в результат запроса за секунду в течение `--delay` интервала. - result MiB/s. количество Мебибайтов, размещаемое сервером в результат запроса за секунду в течение `--delay` интервала. - Процентили времени выполнения запросов. -## Сравнительный режим {#clickhouse-benchmark-comparison-mode} +## Режим сравнения {#clickhouse-benchmark-comparison-mode} `clickhouse-benchmark` может сравнивать производительность двух работающих серверов ClickHouse. From 6560ec3ed553409fa8d63db9f95287039c218471 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 20 Jan 2021 19:36:18 +0300 Subject: [PATCH 131/697] fix segfault on aggregation when MV has unexpected structure --- src/AggregateFunctions/AggregateFunctionSum.h | 8 ++-- src/Storages/StorageMaterializedView.cpp | 24 ++++++++--- ...ialized_view_different_structure.reference | 6 +++ ..._materialized_view_different_structure.sql | 42 +++++++++++++++++++ 4 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01182_materialized_view_different_structure.reference create mode 100644 tests/queries/0_stateless/01182_materialized_view_different_structure.sql diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 1038c8107a5..134b7e490d1 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -287,7 +287,7 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - const auto & column = static_cast(*columns[0]); + const auto & column = assert_cast(*columns[0]); if constexpr (is_big_int_v) this->data(place).add(static_cast(column.getData()[row_num])); else @@ -309,7 +309,7 @@ public: } else { - const auto & column = static_cast(*columns[0]); + const auto & column = assert_cast(*columns[0]); this->data(place).addMany(column.getData().data(), batch_size); } } @@ -327,7 +327,7 @@ public: } else { - const auto & column = static_cast(*columns[0]); + const auto & column = assert_cast(*columns[0]); this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size); } } @@ -349,7 +349,7 @@ public: void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { - auto & column = static_cast(to); + auto & column = assert_cast(to); column.getData().push_back(this->data(place).get()); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 61fdbc0198b..9b5a4bad697 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include namespace DB @@ -130,7 +132,7 @@ Pipe StorageMaterializedView::read( void StorageMaterializedView::read( QueryPlan & query_plan, const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -139,15 +141,27 @@ void StorageMaterializedView::read( { auto storage = getTargetTable(); auto lock = storage->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(target_metadata_snapshot, context); - storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); + storage->read(query_plan, column_names, target_metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); if (query_plan.isInitialized()) { + auto mv_header = getHeaderForProcessingStage(*this, column_names, metadata_snapshot, query_info, context, processed_stage); + auto target_header = getHeaderForProcessingStage(*storage, column_names, target_metadata_snapshot, query_info, context, processed_stage); + if (!blocksHaveEqualStructure(mv_header, target_header)) + { + auto converting_actions = ActionsDAG::makeConvertingActions(target_header.getColumnsWithTypeAndName(), + mv_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); + query_plan.addStep(std::move(converting_step)); + } + StreamLocalLimits limits; SizeLimits leaf_limits; @@ -161,7 +175,7 @@ void StorageMaterializedView::read( nullptr, nullptr); - adding_limits_and_quota->setStepDescription("Lock destination table for Buffer"); + adding_limits_and_quota->setStepDescription("Lock destination table for MaterializedView"); query_plan.addStep(std::move(adding_limits_and_quota)); } } diff --git a/tests/queries/0_stateless/01182_materialized_view_different_structure.reference b/tests/queries/0_stateless/01182_materialized_view_different_structure.reference new file mode 100644 index 00000000000..a1f113394b2 --- /dev/null +++ b/tests/queries/0_stateless/01182_materialized_view_different_structure.reference @@ -0,0 +1,6 @@ +4999950000.000000 +4999950000 +1000 499500 499500 999 0 +1000 124716 499500 255 0 +1000 124716 99 0 +2000 249432 255 0 diff --git a/tests/queries/0_stateless/01182_materialized_view_different_structure.sql b/tests/queries/0_stateless/01182_materialized_view_different_structure.sql new file mode 100644 index 00000000000..751bcc9e48e --- /dev/null +++ b/tests/queries/0_stateless/01182_materialized_view_different_structure.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS test_table; +DROP TABLE IF EXISTS numbers; +DROP TABLE IF EXISTS test_mv; +DROP TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv; +DROP TABLE IF EXISTS dist; + +CREATE TABLE test_table (key UInt32, value Decimal(16, 6)) ENGINE = SummingMergeTree() ORDER BY key; +CREATE TABLE numbers (number UInt64) ENGINE=Memory; + +CREATE MATERIALIZED VIEW test_mv TO test_table (number UInt64, value Decimal(38, 6)) +AS SELECT number, sum(number) AS value FROM (SELECT *, toDecimal64(number, 6) AS val FROM numbers) GROUP BY number; + +INSERT INTO numbers SELECT * FROM numbers(100000); + +SELECT sum(value) FROM test_mv; +SELECT sum(value) FROM (SELECT number, sum(number) AS value FROM (SELECT *, toDecimal64(number, 6) AS val FROM numbers) GROUP BY number); + +CREATE TABLE src (n UInt64, s FixedString(16)) ENGINE=Memory; +CREATE TABLE dst (n UInt8, s String) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv TO dst (n String) AS SELECT * FROM src; +SET allow_experimental_bigint_types=1; +CREATE TABLE dist (n Int128) ENGINE=Distributed(test_cluster_two_shards, currentDatabase(), mv); + +INSERT INTO src SELECT number, toString(number) FROM numbers(1000); +INSERT INTO mv SELECT toString(number + 1000) FROM numbers(1000); -- { serverError 53 } +INSERT INTO mv SELECT arrayJoin(['42', 'test']); -- { serverError 53 } + +SELECT count(), sum(n), sum(toInt64(s)), max(n), min(n) FROM src; +SELECT count(), sum(n), sum(toInt64(s)), max(n), min(n) FROM dst; +SELECT count(), sum(toInt64(n)), max(n), min(n) FROM mv; +SELECT count(), sum(toInt64(n)), max(n), min(n) FROM dist; -- { serverError 70 } +SELECT count(), sum(toInt64(n)), max(toUInt32(n)), min(toInt128(n)) FROM dist; + +DROP TABLE test_table; +DROP TABLE numbers; +DROP TABLE test_mv; +DROP TABLE src; +DROP TABLE dst; +DROP TABLE mv; +DROP TABLE dist; From 8da4aa5bf08a2898b8e40634e1f45f69422d805d Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Wed, 20 Jan 2021 20:10:35 +0300 Subject: [PATCH 132/697] DOCSUP-5272: fix PR and ticket comments --- .../sql-reference/statements/create/table.md | 21 ++++++++++++++----- .../sql-reference/statements/create/table.md | 21 ++++++++++++++----- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 95ac0252eaa..3b7506ae89a 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -111,19 +111,30 @@ It is not possible to set default values for elements in nested data structures. You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways: -- inside the column list +- Inside the column list ``` sql -CREATE TABLE db.table_name (name1 type1, name2 type2, ..., PRIMARY KEY (expr1[, expr2,...])]) ENGINE = engine; +CREATE TABLE db.table_name +( + name1 type1, name2 type2, ..., + PRIMARY KEY(expr1[, expr2,...])] +) +ENGINE = engine; ``` -- outside the column list +- Outside the column list ``` sql -CREATE TABLE db.table_name (name1 type1, name2 type2, ...) ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); +CREATE TABLE db.table_name +( + name1 type1, name2 type2, ... +) +ENGINE = engine +PRIMARY KEY(expr1[, expr2,...]); ``` -You can't combine both ways in one query. +!!! warning "Warning" + You can't combine both ways in one query. ## Constraints {#constraints} diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index e91a7f15903..5244774c58c 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -85,19 +85,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... Вы можете определить [первичный ключ](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) при создании таблицы. Первичный ключ может быть указан двумя способами: -- В списке столбцов: +- в списке столбцов: ``` sql -CREATE TABLE db.table_name (name1 type1, name2 type2, ..., PRIMARY KEY (expr1[, expr2,...])]) ENGINE = engine; +CREATE TABLE db.table_name +( + name1 type1, name2 type2, ..., + PRIMARY KEY(expr1[, expr2,...])] +) +ENGINE = engine; ``` -- Вне списка столбцов: +- вне списка столбцов: ``` sql -CREATE TABLE db.table_name (name1 type1, name2 type2, ...) ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); +CREATE TABLE db.table_name +( + name1 type1, name2 type2, ... +) +ENGINE = engine +PRIMARY KEY(expr1[, expr2,...]); ``` -Вы не можете сочетать оба способа в одном запросе. +!!! warning "Предупреждение" + Вы не можете сочетать оба способа в одном запросе. ### Ограничения (constraints) {#constraints} From 801c540f5e755a928337202e5de8cc73cf5fba29 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Jan 2021 20:33:12 +0300 Subject: [PATCH 133/697] Try fix tests. --- src/Processors/QueryPlan/QueryPlan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 0e16d97d436..92ee5d4a1d2 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -640,6 +640,8 @@ void QueryPlan::optimize() while (tryMergeExpressions(frame.node, frame.node->children.front())); trySplitFilter(frame.node, nodes); + + tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes); } stack.pop(); From 7d61f27abb2cd8ecd8a0d09657035c70441f58e8 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Wed, 20 Jan 2021 20:51:14 +0300 Subject: [PATCH 134/697] Fix ansi.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Разрешаю конфликт с мастером. --- docs/en/sql-reference/ansi.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md index 84e47902f3b..18243a5f9f5 100644 --- a/docs/en/sql-reference/ansi.md +++ b/docs/en/sql-reference/ansi.md @@ -25,15 +25,15 @@ The following table lists cases when query feature works in ClickHouse, but beha |------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | **E011** | **Numeric data types** | **Partial**{.text-warning} | | | E011-01 | INTEGER and SMALLINT data types | Yes {.text-success} | | -| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Yes {.text-warning} | | -| E011-03 | DECIMAL and NUMERIC data types | Partial {.text-warning} | Only `DECIMAL(p,s)` is supported, not `NUMERIC` | +| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Yes {.text-success} | | +| E011-03 | DECIMAL and NUMERIC data types | Yes {.text-success} | | | E011-04 | Arithmetic operators | Yes {.text-success} | | | E011-05 | Numeric comparison | Yes {.text-success} | | | E011-06 | Implicit casting among the numeric data types | No {.text-danger} | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast | | **E021** | **Character string types** | **Partial**{.text-warning} | | -| E021-01 | CHARACTER data type | Yes {.text-danger} | | -| E021-02 | CHARACTER VARYING data type | Yes {.text-danger} | | -| E021-03 | Character literals | Partial {.text-warning} | No automatic concatenation of consecutive literals and character set support | +| E021-01 | CHARACTER data type | Yes {.text-success} | | +| E021-02 | CHARACTER VARYING data type | Yes {.text-success} | | +| E021-03 | Character literals | Yes {.text-success} | | | E021-04 | CHARACTER_LENGTH function | Partial {.text-warning} | No `USING` clause | | E021-05 | OCTET_LENGTH function | No {.text-danger} | `LENGTH` behaves similarly | | E021-06 | SUBSTRING | Partial {.text-warning} | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant | From 2d87e52b3a2a9db9db6a045b8c24a4dc88a29e5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 Jan 2021 03:24:44 +0300 Subject: [PATCH 135/697] Fix bad formatting of IPv4 addresses --- src/DataTypes/DataTypeNumberBase.h | 8 +++++++- src/DataTypes/IDataType.h | 2 +- .../0_stateless/01656_ipv4_bad_formatting.reference | 4 ++++ tests/queries/0_stateless/01656_ipv4_bad_formatting.sql | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01656_ipv4_bad_formatting.reference create mode 100644 tests/queries/0_stateless/01656_ipv4_bad_formatting.sql diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index 0390da2cb6f..cbbc203bf4f 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -51,7 +51,13 @@ public: bool isParametric() const override { return false; } bool haveSubtypes() const override { return false; } - bool shouldAlignRightInPrettyFormats() const override { return true; } + + bool shouldAlignRightInPrettyFormats() const override + { + /// Just a number, without customizations. Counterexample: IPv4. + return !custom_text_serialization; + } + bool textCanContainOnlyValidUTF8() const override { return true; } bool isComparable() const override { return true; } bool isValueRepresentedByNumber() const override { return true; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index b67c5ee1846..cb9fc7f122c 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -497,7 +497,7 @@ public: /// For all other substreams (like ArraySizes, NullMasks, etc.) we use only /// generic compression codecs like LZ4. static bool isSpecialCompressionAllowed(const SubstreamPath & path); -private: +protected: friend class DataTypeFactory; friend class AggregateFunctionSimpleState; /// Customize this DataType diff --git a/tests/queries/0_stateless/01656_ipv4_bad_formatting.reference b/tests/queries/0_stateless/01656_ipv4_bad_formatting.reference new file mode 100644 index 00000000000..a7b5c448f13 --- /dev/null +++ b/tests/queries/0_stateless/01656_ipv4_bad_formatting.reference @@ -0,0 +1,4 @@ +┌─x───────────────┬─y───────────────┬──────────z─┐ +│ 1.1.1.1 │ 1.1.1.1 │ 16843009 │ +│ 255.255.255.255 │ 255.255.255.255 │ 4294967295 │ +└─────────────────┴─────────────────┴────────────┘ diff --git a/tests/queries/0_stateless/01656_ipv4_bad_formatting.sql b/tests/queries/0_stateless/01656_ipv4_bad_formatting.sql new file mode 100644 index 00000000000..a0b253ea31a --- /dev/null +++ b/tests/queries/0_stateless/01656_ipv4_bad_formatting.sql @@ -0,0 +1 @@ +SELECT arrayJoin(['1.1.1.1', '255.255.255.255']) AS x, toIPv4(x) AS y, toUInt32(y) AS z FORMAT PrettyCompactNoEscapes; From 2094eae23df8ae19002ff65af842b5ed6913798b Mon Sep 17 00:00:00 2001 From: feng lv Date: Thu, 21 Jan 2021 04:49:35 +0000 Subject: [PATCH 136/697] fix sleep with infinite input --- src/Functions/sleep.h | 4 ++-- .../queries/0_stateless/01655_sleep_infinite_float.reference | 0 tests/queries/0_stateless/01655_sleep_infinite_float.sql | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01655_sleep_infinite_float.reference create mode 100644 tests/queries/0_stateless/01655_sleep_infinite_float.sql diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 6dca6b5f84c..65566e36d1f 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -78,8 +78,8 @@ public: Float64 seconds = applyVisitor(FieldVisitorConvertToNumber(), assert_cast(*col).getField()); - if (seconds < 0) - throw Exception("Cannot sleep negative amount of time (not implemented)", ErrorCodes::BAD_ARGUMENTS); + if (seconds < 0 || !std::isfinite(seconds)) + throw Exception("Cannot sleep infinite or negative amount of time (not implemented)", ErrorCodes::BAD_ARGUMENTS); size_t size = col->size(); diff --git a/tests/queries/0_stateless/01655_sleep_infinite_float.reference b/tests/queries/0_stateless/01655_sleep_infinite_float.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01655_sleep_infinite_float.sql b/tests/queries/0_stateless/01655_sleep_infinite_float.sql new file mode 100644 index 00000000000..a469ba9674a --- /dev/null +++ b/tests/queries/0_stateless/01655_sleep_infinite_float.sql @@ -0,0 +1,2 @@ +SELECT sleep(nan); -- { serverError 36 } +SELECT sleep(inf); -- { serverError 36 } From 7f68afa362ab8ce926168af86576352fdd0e46a9 Mon Sep 17 00:00:00 2001 From: Winter Zhang Date: Thu, 21 Jan 2021 13:11:07 +0800 Subject: [PATCH 137/697] ISSUES-18684 fix MaterializeMySQL integration failure --- .../test_materialize_mysql_database/materialize_with_ddl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index c04194c8ebb..a4320ee54c5 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -460,6 +460,7 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name mysql_node.query("INSERT INTO test_database.t1(a) VALUES(2)") mysql_node.query("/* start */ commit /* end */") + check_query(clickhouse_node, "SHOW TABLES FROM test_database FORMAT TSV", "t1\n") check_query(clickhouse_node, "SELECT * FROM test_database.t1 ORDER BY a FORMAT TSV", "1\tBEGIN\n2\tBEGIN\n") clickhouse_node.query("DROP DATABASE test_database") mysql_node.query("DROP DATABASE test_database") From 2edf69fe6459657a0b1783b3236d1ce7445db308 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Thu, 21 Jan 2021 09:17:12 +0300 Subject: [PATCH 138/697] Fix --- docs/en/operations/caches.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 0107c340019..0323fc84ef4 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -19,6 +19,6 @@ Additional cache types: - [dictionaries data cache](../sql-reference/dictionaries/index.md) Not directly used: -- page cache OS +- OS page cache [Original article](https://clickhouse.tech/docs/en/operations/caches/) From 5f3059555a039181cdb9b558fb073fc7998ad496 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 Jan 2021 10:26:08 +0300 Subject: [PATCH 139/697] Fix system.parts _state column There was LOGICAL_ERROR when querying this column, due to incorrect order: SELECT *, _state FROM system.parts 2021.01.21 10:22:57.731556 [ 22851 ] {02a07c6d-467d-4681-9203-4dc11cc6fbee} : Logical error: 'Invalid Field get from type String to type UInt64'. --- src/Storages/System/StorageSystemParts.cpp | 7 ++-- .../01660_system_parts_smoke.reference | 14 +++++++ .../0_stateless/01660_system_parts_smoke.sql | 41 +++++++++++++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01660_system_parts_smoke.reference create mode 100644 tests/queries/0_stateless/01660_system_parts_smoke.sql diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 7ae20ed024e..5b9461b5c25 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -139,9 +139,6 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto columns_[i++]->insertDefault(); } - if (has_state_column) - columns_[i++]->insert(part->stateString()); - MinimalisticDataPartChecksums helper; helper.computeTotalChecksums(part->checksums); @@ -184,6 +181,10 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc())); add_ttl_info_map(part->ttl_infos.recompression_ttl); + + /// _state column should be the latest. + if (has_state_column) + columns_[i++]->insert(part->stateString()); } } diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.reference b/tests/queries/0_stateless/01660_system_parts_smoke.reference new file mode 100644 index 00000000000..f21fab8e539 --- /dev/null +++ b/tests/queries/0_stateless/01660_system_parts_smoke.reference @@ -0,0 +1,14 @@ +# two parts +Committed +Committed +all_1_1_0 Committed +all_2_2_0 Committed +all_1_1_0 1 +all_2_2_0 1 +# optimize +2 Outdated +1 Committed +# truncate +Outdated +Outdated +# drop diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.sql b/tests/queries/0_stateless/01660_system_parts_smoke.sql new file mode 100644 index 00000000000..8a1b0a12f81 --- /dev/null +++ b/tests/queries/0_stateless/01660_system_parts_smoke.sql @@ -0,0 +1,41 @@ +-- There is different code path when: +-- - _state is not requested +-- - _state is requested +-- - only _state is requested +SELECT * FROM system.parts FORMAT Null; +SELECT *, _state FROM system.parts FORMAT Null; +SELECT _state FROM system.parts FORMAT Null; + +-- Create one table and see some columns in system.parts +DROP TABLE IF EXISTS data_01660; +CREATE TABLE data_01660 (key Int) Engine=MergeTree() ORDER BY key; +SYSTEM STOP MERGES data_01660; + +-- Empty +SELECT _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; +SELECT name, _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; +SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; + +-- Add part and check again +SELECT '# two parts'; +INSERT INTO data_01660 VALUES (0); +INSERT INTO data_01660 VALUES (1); +SELECT _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; +SELECT name, _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; +SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; + +-- OPTIMIZE to create Outdated parts +SELECT '# optimize'; +SYSTEM START MERGES data_01660; +OPTIMIZE TABLE data_01660 FINAL; +SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state; + +-- TRUNCATE does not remove parts instantly +SELECT '# truncate'; +TRUNCATE data_01660; +SELECT _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; + +-- But DROP does +SELECT '# drop'; +DROP TABLE data_01660; +SELECT * FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; From 58c57bbb9dc719f5980bec6a6ad702b79b42135e Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 20 Jan 2021 20:08:16 +0100 Subject: [PATCH 140/697] Allow building librdkafka without ssl --- cmake/find/rdkafka.cmake | 6 ++-- contrib/librdkafka-cmake/CMakeLists.txt | 40 +++++++++++++++++++------ contrib/librdkafka-cmake/config.h.in | 6 ++-- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/cmake/find/rdkafka.cmake b/cmake/find/rdkafka.cmake index 26005acc4d4..bf7028feb02 100644 --- a/cmake/find/rdkafka.cmake +++ b/cmake/find/rdkafka.cmake @@ -1,9 +1,7 @@ -if (NOT ARCH_ARM AND OPENSSL_FOUND) +if (NOT ARCH_ARM) option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES}) -elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL") elseif(ENABLE_RDKAFKA) - message (${RECONFIGURE_MESSAGE_LEVEL} "librdafka is not supported on ARM and on FreeBSD") + message (${RECONFIGURE_MESSAGE_LEVEL} "librdafka is not supported on ARM") endif () if (NOT ENABLE_RDKAFKA) diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 1d9e839e8cf..2b55b22cd2b 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -50,12 +50,12 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_request.c ${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c -# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # optionally included below - ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c +# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # optionally included below +# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c # optionally included below ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c - ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c +# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c # optionally included below # ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_win32.c - ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c +# ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c # optionally included below ${RDKAFKA_SOURCE_DIR}/rdkafka_sticky_assignor.c ${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c ${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c @@ -82,10 +82,33 @@ set(SRCS if(${ENABLE_CYRUS_SASL}) message (STATUS "librdkafka with SASL support") - set(SRCS - ${SRCS} - ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # needed to support Kerberos, requires cyrus-sasl - ) + set(WITH_SASL_CYRUS 1) +endif() + +if(OPENSSL_FOUND) + message (STATUS "librdkafka with SSL support") + set(WITH_SSL 1) + + if(${ENABLE_CYRUS_SASL}) + set(WITH_SASL_SCRAM 1) + set(WITH_SASL_OAUTHBEARER 1) + endif() +endif() + +if(WITH_SSL) + list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c) +endif() + +if(WITH_SASL_CYRUS) + list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c) # needed to support Kerberos, requires cyrus-sasl +endif() + +if(WITH_SASL_SCRAM) + list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c) +endif() + +if(WITH_SASL_OAUTHBEARER) + list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c) endif() add_library(rdkafka ${SRCS}) @@ -101,7 +124,6 @@ if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY) endif() if(${ENABLE_CYRUS_SASL}) target_link_libraries(rdkafka PRIVATE ${CYRUS_SASL_LIBRARY}) - set(WITH_SASL_CYRUS 1) endif() file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/auxdir) diff --git a/contrib/librdkafka-cmake/config.h.in b/contrib/librdkafka-cmake/config.h.in index 1c9057bd794..29e833959f7 100644 --- a/contrib/librdkafka-cmake/config.h.in +++ b/contrib/librdkafka-cmake/config.h.in @@ -60,11 +60,11 @@ // WITH_SOCKEM #define WITH_SOCKEM 1 // libssl -#define WITH_SSL 1 +#cmakedefine WITH_SSL 1 // WITH_SASL_SCRAM -#define WITH_SASL_SCRAM 1 +#cmakedefine WITH_SASL_SCRAM 1 // WITH_SASL_OAUTHBEARER -#define WITH_SASL_OAUTHBEARER 1 +#cmakedefine WITH_SASL_OAUTHBEARER 1 #cmakedefine WITH_SASL_CYRUS 1 // crc32chw #if !defined(__PPC__) From 1bb8cc5c9ab8659b97082abc2a678cbe857c92ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 Jan 2021 11:10:31 +0300 Subject: [PATCH 141/697] Avoid UBSan report in arrayElement --- src/Functions/array/arrayElement.cpp | 27 ++++++++++++++----- .../01657_array_element_ubsan.reference | 26 ++++++++++++++++++ .../0_stateless/01657_array_element_ubsan.sql | 19 +++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/01657_array_element_ubsan.reference create mode 100644 tests/queries/0_stateless/01657_array_element_ubsan.sql diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 88166f04e0e..7d053988cae 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -231,7 +231,7 @@ struct ArrayElementNumImpl if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = offsets[i] + index; result[i] = data[j]; @@ -329,7 +329,7 @@ struct ArrayElementStringImpl TIndex index = indices[i]; if (index > 0 && static_cast(index) <= array_size) adjusted_index = index - 1; - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) adjusted_index = array_size + index; else adjusted_index = array_size; /// means no element should be taken @@ -427,7 +427,7 @@ struct ArrayElementGenericImpl if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = offsets[i] + index; result.insertFrom(data, j); @@ -472,11 +472,24 @@ ColumnPtr FunctionArrayElement::executeNumberConst( auto col_res = ColumnVector::create(); if (index.getType() == Field::Types::UInt64) + { ArrayElementNumImpl::template vectorConst( col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData(), builder); + } else if (index.getType() == Field::Types::Int64) + { + /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. + /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, + /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). + /// This is only needed to avoid UBSan report. + + /// Negative array indices work this way: + /// arr[-1] is the element at offset 0 from the last + /// arr[-2] is the element at offset 1 from the last and so on. + ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), -(UInt64(safeGet(index)) + 1), col_res->getData(), builder); + } else throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); @@ -534,7 +547,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -safeGet(index) - 1, + -(UInt64(safeGet(index)) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -588,7 +601,7 @@ ColumnPtr FunctionArrayElement::executeGenericConst( col_nested, col_array->getOffsets(), safeGet(index) - 1, *col_res, builder); else if (index.getType() == Field::Types::Int64) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -safeGet(index) - 1, *col_res, builder); + col_nested, col_array->getOffsets(), -(UInt64(safeGet(index) + 1)), *col_res, builder); else throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); @@ -639,7 +652,7 @@ ColumnPtr FunctionArrayElement::executeConst(const ColumnsWithTypeAndName & argu if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = array_size + index; res->insertFrom(array_elements, j); diff --git a/tests/queries/0_stateless/01657_array_element_ubsan.reference b/tests/queries/0_stateless/01657_array_element_ubsan.reference new file mode 100644 index 00000000000..14e3161f529 --- /dev/null +++ b/tests/queries/0_stateless/01657_array_element_ubsan.reference @@ -0,0 +1,26 @@ +0 +0 +0 +0 +--- +0 +0 +0 +--- +0 +0 +0 +0 +0 +0 +0 +1 +--- +0 +0 +0 +0 +0 +0 +0 +1 diff --git a/tests/queries/0_stateless/01657_array_element_ubsan.sql b/tests/queries/0_stateless/01657_array_element_ubsan.sql new file mode 100644 index 00000000000..82ddf643389 --- /dev/null +++ b/tests/queries/0_stateless/01657_array_element_ubsan.sql @@ -0,0 +1,19 @@ +SELECT [number][10000000000] FROM numbers(1); +SELECT [number][-10000000000] FROM numbers(1); + +SELECT [number][-0x8000000000000000] FROM numbers(1); +SELECT [number][0xFFFFFFFFFFFFFFFF] FROM numbers(1); + +SELECT '---'; + +SELECT [materialize(1)][0xFFFFFFFFFFFFFFFF]; +SELECT [materialize(1)][materialize(18446744073709551615)]; +SELECT [materialize(1)][-0x8000000000000000]; + +SELECT '---'; + +SELECT [number][arrayJoin([-0x8000000000000000, -10000000000, 0, -1])] FROM numbers(2); + +SELECT '---'; + +SELECT [number][arrayJoin([0xFFFFFFFFFFFFFFFF, 10000000000, 0, 1])] FROM numbers(2); From 110089086bbf0148743a92ef14fe5f51e287b5a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 Jan 2021 11:41:19 +0300 Subject: [PATCH 142/697] Fix UBSan report in GatherUtils #19287 --- src/Functions/GatherUtils/Algorithms.h | 6 +++--- .../0_stateless/01658_substring_ubsan.reference | 0 tests/queries/0_stateless/01658_substring_ubsan.sql | 10 ++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01658_substring_ubsan.reference create mode 100644 tests/queries/0_stateless/01658_substring_ubsan.sql diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index 620d6439af2..101e1354bc6 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -342,7 +342,7 @@ void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, const IC if (offset > 0) slice = src.getSliceFromLeft(offset - 1); else - slice = src.getSliceFromRight(-offset); + slice = src.getSliceFromRight(-UInt64(offset)); writeSlice(slice, sink); } @@ -374,7 +374,7 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol Int64 size = has_length ? length_nested_column->getInt(row_num) : static_cast(src.getElementSize()); if (size < 0) - size += offset > 0 ? static_cast(src.getElementSize()) - (offset - 1) : -offset; + size += offset > 0 ? static_cast(src.getElementSize()) - (offset - 1) : -UInt64(offset); if (offset != 0 && size > 0) { @@ -383,7 +383,7 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol if (offset > 0) slice = src.getSliceFromLeft(offset - 1, size); else - slice = src.getSliceFromRight(-offset, size); + slice = src.getSliceFromRight(-UInt64(offset), size); writeSlice(slice, sink); } diff --git a/tests/queries/0_stateless/01658_substring_ubsan.reference b/tests/queries/0_stateless/01658_substring_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01658_substring_ubsan.sql b/tests/queries/0_stateless/01658_substring_ubsan.sql new file mode 100644 index 00000000000..3d7968b8d6b --- /dev/null +++ b/tests/queries/0_stateless/01658_substring_ubsan.sql @@ -0,0 +1,10 @@ +/** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size: + * substring: + * hello + * ^-----^ - offset -10, length 7, result: "he" + * substringUTF8: + * hello + * ^-----^ - offset -10, length 7, result: "hello" + * This may be subject for change. + */ +SELECT substringUTF8('hello, пÑ�ивеÑ�', -9223372036854775808, number) FROM numbers(16) FORMAT Null; From 0d20b4575da223d899650b0eea751c7f4b5e800c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 Jan 2021 11:41:32 +0300 Subject: [PATCH 143/697] Fix test. --- src/Functions/now64.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index ac418312698..3d02885c726 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -133,7 +133,10 @@ public: FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override { - const UInt32 scale = assert_cast(result_type.get())->getScale(); + UInt32 scale = DataTypeDateTime64::default_scale; + if (const auto * type = typeid_cast(result_type.get())) + scale = type->getScale(); + return std::make_unique(nowSubsecond(scale), result_type); } }; From 9ae3628b662f5f9b34695a3f4f7b07307323fc8b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 Jan 2021 11:49:40 +0300 Subject: [PATCH 144/697] Fix test. --- src/Functions/now64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index 3d02885c726..feb821fde82 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -134,7 +135,8 @@ public: FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override { UInt32 scale = DataTypeDateTime64::default_scale; - if (const auto * type = typeid_cast(result_type.get())) + auto res_type = removeNullable(result_type); + if (const auto * type = typeid_cast(res_type.get())) scale = type->getScale(); return std::make_unique(nowSubsecond(scale), result_type); From af7dca7bb78a579f0047a2197d823306c4b4887c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 21 Jan 2021 11:54:56 +0300 Subject: [PATCH 145/697] Update perftest. --- tests/performance/split_filter.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/split_filter.xml b/tests/performance/split_filter.xml index 7bd4af51abd..4b503a6645b 100644 --- a/tests/performance/split_filter.xml +++ b/tests/performance/split_filter.xml @@ -1,4 +1,4 @@ - select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000)) where y = 0 settings enable_optimize_predicate_expression=0 - select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0 + select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(200000000)) where y = 0 settings enable_optimize_predicate_expression=0 + select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(200000000) limit 200000000) where y = 0 From e75b116466328df4ffb9144435a05e0ad9f714d9 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 21 Jan 2021 17:01:35 +0800 Subject: [PATCH 146/697] Rewrite `sum(if())` and `sumIf` to `countIf` in special cases (#17041) Co-authored-by: vdimir --- src/Core/Settings.h | 1 + src/Interpreters/InDepthNodeVisitor.h | 4 +- .../RewriteSumIfFunctionVisitor.cpp | 91 +++++++++++++++++++ .../RewriteSumIfFunctionVisitor.h | 30 ++++++ src/Interpreters/TreeOptimizer.cpp | 11 +++ src/Interpreters/ya.make | 1 + tests/performance/rewrite_sumIf.xml | 4 + .../01646_rewrite_sum_if.reference | 24 +++++ .../0_stateless/01646_rewrite_sum_if.sql | 35 +++++++ 9 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 src/Interpreters/RewriteSumIfFunctionVisitor.cpp create mode 100644 src/Interpreters/RewriteSumIfFunctionVisitor.h create mode 100644 tests/performance/rewrite_sumIf.xml create mode 100644 tests/queries/0_stateless/01646_rewrite_sum_if.reference create mode 100644 tests/queries/0_stateless/01646_rewrite_sum_if.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6ebdaaa4c84..cc32417af09 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -416,6 +416,7 @@ class IColumn; M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated experimental parser", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ \ + M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h index 7a793566cdd..3ba25a327c4 100644 --- a/src/Interpreters/InDepthNodeVisitor.h +++ b/src/Interpreters/InDepthNodeVisitor.h @@ -68,11 +68,11 @@ struct NeedChild }; /// Simple matcher for one node type. Use need_child function for complex traversal logic. -template +template class OneTypeMatcher { public: - using Data = Data_; + using Data = DataImpl; using TypeToVisit = typename Data::TypeToVisit; static bool needChildVisit(const ASTPtr & node, const ASTPtr & child) { return need_child(node, child); } diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp new file mode 100644 index 00000000000..b856f9164e0 --- /dev/null +++ b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * func = ast->as()) + visit(*func, ast, data); +} + +static ASTPtr createNewFunctionWithOneArgument(const String & func_name, const ASTPtr & argument) +{ + auto new_func = std::make_shared(); + new_func->name = func_name; + + auto new_arguments = std::make_shared(); + new_arguments->children.push_back(argument); + new_func->arguments = new_arguments; + new_func->children.push_back(new_arguments); + return new_func; +} + +void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) +{ + if (!func.arguments || func.arguments->children.empty()) + return; + + auto lower_name = Poco::toLower(func.name); + + if (lower_name != "sum" && lower_name != "sumif") + return; + + auto & func_arguments = func.arguments->children; + + if (lower_name == "sumif") + { + /// sumIf(1, cond) -> countIf(cond) + const auto * literal = func_arguments[0]->as(); + if (func_arguments.size() == 2 && literal && literal->value.get() == 1) + { + auto new_func = createNewFunctionWithOneArgument("countIf", func_arguments[1]); + new_func->setAlias(func.alias); + ast = std::move(new_func); + return; + } + } + + else + { + const auto * nested_func = func_arguments[0]->as(); + + if (!nested_func || Poco::toLower(nested_func->name) != "if" || nested_func->arguments->children.size() != 3) + return; + + auto & if_arguments = nested_func->arguments->children; + + const auto * first_literal = if_arguments[1]->as(); + const auto * second_literal = if_arguments[2]->as(); + + if (first_literal && second_literal) + { + auto first_value = first_literal->value.get(); + auto second_value = second_literal->value.get(); + /// sum(if(cond, 1, 0)) -> countIf(cond) + if (first_value == 1 && second_value == 0) + { + auto new_func = createNewFunctionWithOneArgument("countIf", if_arguments[0]); + new_func->setAlias(func.alias); + ast = std::move(new_func); + return; + } + /// sum(if(cond, 0, 1)) -> countIf(not(cond)) + if (first_value == 0 && second_value == 1) + { + auto not_func = createNewFunctionWithOneArgument("not", if_arguments[0]); + auto new_func = createNewFunctionWithOneArgument("countIf", not_func); + new_func->setAlias(func.alias); + ast = std::move(new_func); + return; + } + } + } + +} + +} diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.h b/src/Interpreters/RewriteSumIfFunctionVisitor.h new file mode 100644 index 00000000000..86aeef65377 --- /dev/null +++ b/src/Interpreters/RewriteSumIfFunctionVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/// Rewrite 'sum(if())' and 'sumIf' functions to counIf. +/// sumIf(1, cond) -> countIf(1, cond) +/// sum(if(cond, 1, 0)) -> countIf(cond) +/// sum(if(cond, 0, 1)) -> countIf(not(cond)) +class RewriteSumIfFunctionMatcher +{ +public: + struct Data + { + }; + + static void visit(ASTPtr & ast, Data &); + static void visit(const ASTFunction &, ASTPtr & ast, Data &); + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +using RewriteSumIfFunctionVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index cee19c632fa..2347ab0d4a5 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -28,6 +28,7 @@ #include #include +#include namespace DB { @@ -548,6 +549,13 @@ void optimizeAnyFunctions(ASTPtr & query) RewriteAnyFunctionVisitor(data).visit(query); } +void optimizeSumIfFunctions(ASTPtr & query) +{ + RewriteSumIfFunctionVisitor::Data data = {}; + RewriteSumIfFunctionVisitor(data).visit(query); +} + + void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context) { RemoveInjectiveFunctionsVisitor::Data data = {context}; @@ -608,6 +616,9 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou if (settings.optimize_move_functions_out_of_any) optimizeAnyFunctions(query); + if (settings.optimize_rewrite_sum_if_to_count_if) + optimizeSumIfFunctions(query); + /// Remove injective functions inside uniq if (settings.optimize_injective_functions_inside_uniq) optimizeInjectiveFunctionsInsideUniq(query, context); diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 77ca6bc0e14..1cadc447e59 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -129,6 +129,7 @@ SRCS( RequiredSourceColumnsData.cpp RequiredSourceColumnsVisitor.cpp RewriteAnyFunctionVisitor.cpp + RewriteSumIfFunctionVisitor.cpp RowRefs.cpp Set.cpp SetVariants.cpp diff --git a/tests/performance/rewrite_sumIf.xml b/tests/performance/rewrite_sumIf.xml new file mode 100644 index 00000000000..4ba4916bdbf --- /dev/null +++ b/tests/performance/rewrite_sumIf.xml @@ -0,0 +1,4 @@ + + SELECT sumIf(1, 0) FROM numbers(100000000) + SELECT sumIf(1, 1) FROM numbers(100000000) + diff --git a/tests/queries/0_stateless/01646_rewrite_sum_if.reference b/tests/queries/0_stateless/01646_rewrite_sum_if.reference new file mode 100644 index 00000000000..0f315b0812a --- /dev/null +++ b/tests/queries/0_stateless/01646_rewrite_sum_if.reference @@ -0,0 +1,24 @@ +0 +0 0 1 +0 +50 +50 50 1 +50 +50 +50 50 50 1 0 +50 +50 +50 50 50 1 0 +50 +0 +0 0 1 +0 +50 +50 50 1 +50 +50 +50 50 50 1 0 +50 +50 +50 50 50 1 0 +50 diff --git a/tests/queries/0_stateless/01646_rewrite_sum_if.sql b/tests/queries/0_stateless/01646_rewrite_sum_if.sql new file mode 100644 index 00000000000..07fb90c0eb7 --- /dev/null +++ b/tests/queries/0_stateless/01646_rewrite_sum_if.sql @@ -0,0 +1,35 @@ +SET optimize_rewrite_sum_if_to_count_if = 0; + +SELECT sumIf(1, number % 2 > 2) FROM numbers(100); +SELECT sumIf(1 as one_expr, number % 2 > 2 as cond_expr), sum(cond_expr), one_expr FROM numbers(100); +SELECT countIf(number % 2 > 2) FROM numbers(100); + +SELECT sumIf(1, number % 2 == 0) FROM numbers(100); +SELECT sumIf(1 as one_expr, number % 2 == 0 as cond_expr), sum(cond_expr), one_expr FROM numbers(100); +SELECT countIf(number % 2 == 0) FROM numbers(100); + +SELECT sum(if(number % 2 == 0, 1, 0)) FROM numbers(100); +SELECT sum(if(number % 2 == 0 as cond_expr, 1 as one_expr, 0 as zero_expr) as if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); +SELECT countIf(number % 2 == 0) FROM numbers(100); + +SELECT sum(if(number % 2 == 0, 0, 1)) FROM numbers(100); +SELECT sum(if(number % 2 == 0 as cond_expr, 0 as zero_expr, 1 as one_expr) as if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); +SELECT countIf(number % 2 != 0) FROM numbers(100); + +SET optimize_rewrite_sum_if_to_count_if = 1; + +SELECT sumIf(1, number % 2 > 2) FROM numbers(100); +SELECT sumIf(1 as one_expr, number % 2 > 2 as cond_expr), sum(cond_expr), one_expr FROM numbers(100); +SELECT countIf(number % 2 > 2) FROM numbers(100); + +SELECT sumIf(1, number % 2 == 0) FROM numbers(100); +SELECT sumIf(1 as one_expr, number % 2 == 0 as cond_expr), sum(cond_expr), one_expr FROM numbers(100); +SELECT countIf(number % 2 == 0) FROM numbers(100); + +SELECT sum(if(number % 2 == 0, 1, 0)) FROM numbers(100); +SELECT sum(if(number % 2 == 0 as cond_expr, 1 as one_expr, 0 as zero_expr) as if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); +SELECT countIf(number % 2 == 0) FROM numbers(100); + +SELECT sum(if(number % 2 == 0, 0, 1)) FROM numbers(100); +SELECT sum(if(number % 2 == 0 as cond_expr, 0 as zero_expr, 1 as one_expr) as if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); +SELECT countIf(number % 2 != 0) FROM numbers(100); From fe6b964b32c94b85b001cb40372d8cdc780367f5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 21 Jan 2021 12:39:46 +0300 Subject: [PATCH 147/697] Revert "Revert "Auto version update to [21.2.1.1] [54446]"" This reverts commit 42f63e14b5b5adaaf72f32a19ec04b9599880605. --- cmake/autogenerated_versions.txt | 10 +++++----- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- .../System/StorageSystemContributors.generated.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index bc06286a1ad..1c2e4c1f55e 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54445) +SET(VERSION_REVISION 54446) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 1) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1) -SET(VERSION_DESCRIBE v21.1.1.5646-prestable) -SET(VERSION_STRING 21.1.1.5646) +SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7) +SET(VERSION_DESCRIBE v21.2.1.1-prestable) +SET(VERSION_STRING 21.2.1.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index 3a267a83c69..1cec020f026 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.1.0) unstable; urgency=low +clickhouse (21.2.1.1) unstable; urgency=low * Modified source code - -- Alexey Milovidov Mon, 11 Jan 2021 03:51:08 +0300 + -- clickhouse-release Mon, 11 Jan 2021 11:12:08 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index ddfe3cd177b..5022687c47b 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.1.0 +ARG version=21.2.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index fa9c0ae5f3a..3cec94b3c66 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.1.0 +ARG version=21.2.1.* ARG gosu_ver=1.10 # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 2e17151b31f..df918928f99 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.1.0 +ARG version=21.2.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index ee39390a0f5..0c50e452e95 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,4 +1,4 @@ -// autogenerated by src/Storages/System/StorageSystemContributors.sh +// autogenerated by ./StorageSystemContributors.sh const char * auto_contributors[] { "0xflotus", "20018712", From 7f32926a218282891cbc6c5082267b7b5db3b867 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 21 Jan 2021 14:37:20 +0300 Subject: [PATCH 148/697] Fix race condition in TestKeeperHandler on session finish --- src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp | 10 +++++++++- src/Common/ZooKeeper/TestKeeperStorageDispatcher.h | 2 ++ src/Server/TestKeeperTCPHandler.cpp | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp index b1233fc47e3..35378e4ff09 100644 --- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp +++ b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp @@ -49,7 +49,7 @@ void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordina std::lock_guard lock(session_to_response_callback_mutex); auto session_writer = session_to_response_callback.find(session_id); if (session_writer == session_to_response_callback.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id); + return; session_writer->second(response); /// Session closed, no more writes @@ -128,4 +128,12 @@ void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperR throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id); } +void TestKeeperStorageDispatcher::finishSession(int64_t session_id) +{ + std::lock_guard lock(session_to_response_callback_mutex); + auto session_it = session_to_response_callback.find(session_id); + if (session_it != session_to_response_callback.end()) + session_to_response_callback.erase(session_it); +} + } diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h index 27abf17ac73..a86895b5be1 100644 --- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h +++ b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h @@ -53,6 +53,8 @@ public: return storage.getSessionID(); } void registerSession(int64_t session_id, ZooKeeperResponseCallback callback); + /// Call if we don't need any responses for this session no more (session was expired) + void finishSession(int64_t session_id); }; } diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp index 7b02996019e..bf407ba96b7 100644 --- a/src/Server/TestKeeperTCPHandler.cpp +++ b/src/Server/TestKeeperTCPHandler.cpp @@ -390,7 +390,11 @@ void TestKeeperTCPHandler::finish() { Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = close_xid; + /// Put close request (so storage will remove all info about session) test_keeper_storage_dispatcher->putRequest(request, session_id); + /// We don't need any callbacks because session can be already dead and + /// nobody wait for response + test_keeper_storage_dispatcher->finishSession(session_id); } std::pair TestKeeperTCPHandler::receiveRequest() From 2cd04e8923051323b906c33786ccac58cff87eae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 Jan 2021 14:42:34 +0300 Subject: [PATCH 149/697] Fix UBSan report in arraySum --- src/Functions/array/arrayAggregation.cpp | 3 ++- .../0_stateless/01659_array_aggregation_ubsan.reference | 1 + tests/queries/0_stateless/01659_array_aggregation_ubsan.sql | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01659_array_aggregation_ubsan.reference create mode 100644 tests/queries/0_stateless/01659_array_aggregation_ubsan.sql diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 992a331d05b..40afd657abb 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -5,6 +5,7 @@ #include #include "FunctionArrayMapped.h" #include +#include namespace DB @@ -121,7 +122,7 @@ struct ArrayAggregateImpl } template - static bool executeType(const ColumnPtr & mapped, const ColumnArray::Offsets & offsets, ColumnPtr & res_ptr) + static NO_SANITIZE_UNDEFINED bool executeType(const ColumnPtr & mapped, const ColumnArray::Offsets & offsets, ColumnPtr & res_ptr) { using Result = ArrayAggregateResult; using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; diff --git a/tests/queries/0_stateless/01659_array_aggregation_ubsan.reference b/tests/queries/0_stateless/01659_array_aggregation_ubsan.reference new file mode 100644 index 00000000000..62c80bed251 --- /dev/null +++ b/tests/queries/0_stateless/01659_array_aggregation_ubsan.reference @@ -0,0 +1 @@ +446744073709551616 diff --git a/tests/queries/0_stateless/01659_array_aggregation_ubsan.sql b/tests/queries/0_stateless/01659_array_aggregation_ubsan.sql new file mode 100644 index 00000000000..1b8b506b26e --- /dev/null +++ b/tests/queries/0_stateless/01659_array_aggregation_ubsan.sql @@ -0,0 +1 @@ +SELECT arraySum([-9000000000000000000, -9000000000000000000]); From 47a0f4e16280b996d0315ec55b9546564e2806cf Mon Sep 17 00:00:00 2001 From: Ildus Kurbangaliev Date: Tue, 24 Nov 2020 16:25:45 +0500 Subject: [PATCH 150/697] Add tuple argument support for argMin and argMax --- .../aggregate-functions/reference/argmax.md | 24 ++- .../aggregate-functions/reference/argmin.md | 12 +- .../data-types/simpleaggregatefunction.md | 2 + .../AggregateFunctionArgMinMax.h | 64 ++++-- src/AggregateFunctions/Helpers.h | 6 + src/AggregateFunctions/HelpersMinMaxAny.h | 80 ++++---- src/AggregateFunctions/IAggregateFunction.h | 194 ++++++++++++++---- .../DataTypeCustomSimpleAggregateFunction.cpp | 3 +- .../0_stateless/00027_argMinMax.reference | 5 + tests/queries/0_stateless/00027_argMinMax.sql | 8 + .../00027_simple_argMinArray.reference | 1 - .../0_stateless/00027_simple_argMinArray.sql | 1 - .../00915_simple_aggregate_function.reference | 6 +- .../00915_simple_aggregate_function.sql | 18 +- 14 files changed, 310 insertions(+), 114 deletions(-) create mode 100644 tests/queries/0_stateless/00027_argMinMax.reference create mode 100644 tests/queries/0_stateless/00027_argMinMax.sql delete mode 100644 tests/queries/0_stateless/00027_simple_argMinArray.reference delete mode 100644 tests/queries/0_stateless/00027_simple_argMinArray.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 3093a4f67ef..35e87d49e60 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -4,6 +4,28 @@ toc_priority: 106 # argMax {#agg-function-argmax} -Syntax: `argMax(arg, val)` +Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))` Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output. + +Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`. + +**Example:** + +``` text +┌─user─────┬─salary─┐ +│ director │ 5000 │ +│ manager │ 3000 │ +│ worker │ 1000 │ +└──────────┴────────┘ +``` + +``` sql +SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary +``` + +``` text +┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 315c7b6c29a..72c9bce6817 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -4,10 +4,12 @@ toc_priority: 105 # argMin {#agg-function-argmin} -Syntax: `argMin(arg, val)` +Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))` Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output. +Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`. + **Example:** ``` text @@ -19,11 +21,11 @@ Calculates the `arg` value for a minimal `val` value. If there are several diffe ``` ``` sql -SELECT argMin(user, salary) FROM salary +SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary ``` ``` text -┌─argMin(user, salary)─┐ -│ worker │ -└──────────────────────┘ +┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ +│ worker │ ('worker',1000) │ +└──────────────────────┴─────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index b23ab5a6717..2d2746f85d3 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -18,6 +18,8 @@ The following aggregate functions are supported: - [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) +- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) +- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 9470b1b8692..67f21db0240 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -1,14 +1,16 @@ #pragma once -#include -#include -#include #include // SingleValueDataString used in embedded compiler +#include +#include +#include +#include +#include +#include "Columns/IColumn.h" namespace DB { - namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -22,37 +24,49 @@ struct AggregateFunctionArgMinMaxData using ResultData_t = ResultData; using ValueData_t = ValueData; - ResultData result; // the argument at which the minimum/maximum value is reached. - ValueData value; // value for which the minimum/maximum is calculated. + ResultData result; // the argument at which the minimum/maximum value is reached. + ValueData value; // value for which the minimum/maximum is calculated. - static bool allocatesMemoryInArena() - { - return ResultData::allocatesMemoryInArena() || ValueData::allocatesMemoryInArena(); - } + static bool allocatesMemoryInArena() { return ResultData::allocatesMemoryInArena() || ValueData::allocatesMemoryInArena(); } + + static String name() { return StringRef(ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax"; } }; /// Returns the first arg value found for the minimum/maximum value. Example: argMax(arg, value). template -class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper> +class AggregateFunctionArgMinMax final : public IAggregateFunctionTupleArgHelper, 2> { private: const DataTypePtr & type_res; const DataTypePtr & type_val; + bool tuple_argument; + + using Base = IAggregateFunctionTupleArgHelper, 2>; public: - AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_) - : IAggregateFunctionDataHelper>({type_res_, type_val_}, {}), - type_res(this->argument_types[0]), type_val(this->argument_types[1]) + AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_, const bool tuple_argument_) + : Base({type_res_, type_val_}, {}, tuple_argument_) + , type_res(this->argument_types[0]) + , type_val(this->argument_types[1]) { if (!type_val->isComparable()) - throw Exception("Illegal type " + type_val->getName() + " of second argument of aggregate function " + getName() - + " because the values of that data type are not comparable", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception( + "Illegal type " + type_val->getName() + " of second argument of aggregate function " + getName() + + " because the values of that data type are not comparable", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + this->tuple_argument = tuple_argument_; } - String getName() const override { return StringRef(Data::ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax"; } + String getName() const override { return Data::name(); } DataTypePtr getReturnType() const override { + if (tuple_argument) + { + return std::make_shared(DataTypes{this->type_res, this->type_val}); + } + return type_res; } @@ -80,15 +94,21 @@ public: this->data(place).value.read(buf, *type_val, arena); } - bool allocatesMemoryInArena() const override - { - return Data::allocatesMemoryInArena(); - } + bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); } void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { - this->data(place).result.insertResultInto(to); + if (tuple_argument) + { + auto & tup = assert_cast(to); + + this->data(place).result.insertResultInto(tup.getColumn(0)); + this->data(place).value.insertResultInto(tup.getColumn(1)); + } + else + this->data(place).result.insertResultInto(to); } }; + } diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index fb727bf98b0..2b21b745a0e 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -31,6 +31,12 @@ M(Float32) \ M(Float64) +#define FOR_DECIMAL_TYPES(M) \ + M(Decimal32) \ + M(Decimal64) \ + M(Decimal128) + + namespace DB { diff --git a/src/AggregateFunctions/HelpersMinMaxAny.h b/src/AggregateFunctions/HelpersMinMaxAny.h index dc165f50d8e..e995f52f498 100644 --- a/src/AggregateFunctions/HelpersMinMaxAny.h +++ b/src/AggregateFunctions/HelpersMinMaxAny.h @@ -8,10 +8,14 @@ #include #include #include - +#include namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} /// min, max, any, anyLast, anyHeavy, etc... template