From 8a00ce5ff1c413cfa22185795682ccf084cdb533 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 24 Jul 2019 15:56:39 +0300 Subject: [PATCH 01/59] Intermediate step --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Parsers/ASTAlterQuery.cpp | 10 +++++ dbms/src/Parsers/ASTAlterQuery.h | 6 ++- dbms/src/Storages/AlterCommands.cpp | 42 +++++++++++++++---- dbms/src/Storages/AlterCommands.h | 16 ++++--- dbms/src/Storages/IStorage.cpp | 8 +++- dbms/src/Storages/IStorage.h | 5 ++- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 11 ++++- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 29 +++++++++++-- dbms/src/Storages/StorageMergeTree.h | 3 ++ dbms/src/Storages/StorageNull.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 6 ++- 16 files changed, 121 insertions(+), 26 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index c472a336d73..d0a5edab1d3 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -437,6 +437,7 @@ namespace ErrorCodes extern const int CANNOT_CREATE_TIMER = 460; extern const int CANNOT_SET_TIMER_PERIOD = 461; extern const int CANNOT_DELETE_TIMER = 462; + extern const int SETTINGS_ARE_NOT_SUPPORTED = 463; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index c7cd100b415..52aa95d65e5 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -45,6 +45,11 @@ ASTPtr ASTAlterCommand::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (settings_changes) + { + res->settings_changes = settings_changes->clone(); + res->children.push_back(res->settings_changes); + } return res; } @@ -184,6 +189,11 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : ""); ttl->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::MODIFY_SETTING) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : ""); + settings_changes->formatImpl(settings, state, frame); + } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 2c4b3ddbaf1..96ec8d91255 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -28,6 +28,7 @@ public: COMMENT_COLUMN, MODIFY_ORDER_BY, MODIFY_TTL, + MODIFY_SETTING, ADD_INDEX, DROP_INDEX, @@ -69,7 +70,7 @@ public: /** The ADD INDEX query stores the name of the index following AFTER. * The DROP INDEX query stores the name for deletion. */ - ASTPtr index; + ASTPtr index; /** Used in DROP PARTITION and ATTACH PARTITION FROM queries. * The value or ID of the partition is stored here. @@ -88,6 +89,9 @@ public: /// For MODIFY TTL query ASTPtr ttl; + /// FOR MODIFY_SETTING + ASTPtr settings_changes; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 7814f1a6ba0..4b733f86d22 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int UNKNOWN_SETTING; } @@ -172,13 +174,20 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ttl = command_ast->ttl; return command; } + else if (command_ast->type == ASTAlterCommand::MODIFY_SETTING) + { + AlterCommand command; + command.type = AlterCommand::MODIFY_SETTING; + command.settings_changes = command_ast->settings_changes->as().changes; + return command; + } else return {}; } void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const { if (type == ADD_COLUMN) { @@ -306,38 +315,43 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri { ttl_table_ast = ttl; } + else if (type == MODIFY_SETTING) + { + changes.insert(changes.begin(), settings_changes.begin(), settings_changes.end()); + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } bool AlterCommand::isMutable() const { - if (type == COMMENT_COLUMN) + if (type == COMMENT_COLUMN || type == MODIFY_SETTING) return false; if (type == MODIFY_COLUMN) return data_type.get() || default_expression; - // TODO: возможно, здесь нужно дополнить return true; } void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const { auto new_columns_description = columns_description; auto new_indices_description = indices_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; auto new_ttl_table_ast = ttl_table_ast; + auto new_changes = changes; for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, changes); columns_description = std::move(new_columns_description); indices_description = std::move(new_indices_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); ttl_table_ast = std::move(new_ttl_table_ast); + changes = std::move(new_changes); } void AlterCommands::validate(const IStorage & table, const Context & context) @@ -453,6 +467,16 @@ void AlterCommands::validate(const IStorage & table, const Context & context) throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; } } + else if (command.type == AlterCommand::MODIFY_SETTING) + { + for (const auto & change : command.settings_changes) + { + if (!table.hasSetting(change.name)) + { + throw Exception{"Storage '" + table.getName() + "' doesn't have setting '" + change.name + "'", ErrorCodes::UNKNOWN_SETTING}; + } + } + } } /** Existing defaulted columns may require default expression extensions with a type conversion, @@ -518,14 +542,15 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } } -void AlterCommands::apply(ColumnsDescription & columns_description) const +void AlterCommands::applyForColumnsOnly(ColumnsDescription & columns_description) const { auto out_columns_description = columns_description; IndicesDescription indices_description; ASTPtr out_order_by; ASTPtr out_primary_key; ASTPtr out_ttl_table; - apply(out_columns_description, indices_description, out_order_by, out_primary_key, out_ttl_table); + SettingsChanges out_changes; + apply(out_columns_description, indices_description, out_order_by, out_primary_key, out_ttl_table, out_changes); if (out_order_by) throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED); @@ -535,6 +560,9 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED); if (out_ttl_table) throw Exception("Storage doesn't support modifying TTL expression", ErrorCodes::NOT_IMPLEMENTED); + if (!out_changes.empty()) + throw Exception("Storage doesn't support modifying settings", ErrorCodes::NOT_IMPLEMENTED); + columns_description = std::move(out_columns_description); } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 4905b80f92f..6daafe5cd7f 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -27,6 +28,7 @@ struct AlterCommand DROP_INDEX, MODIFY_TTL, UKNOWN_TYPE, + MODIFY_SETTING, }; Type type = UKNOWN_TYPE; @@ -71,6 +73,9 @@ struct AlterCommand /// For ADD and MODIFY CompressionCodecPtr codec; + /// For MODIFY SETTING + SettingsChanges settings_changes; + AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, @@ -84,7 +89,7 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, - ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; + ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const; /// Checks that not only metadata touched by that command bool isMutable() const; @@ -95,11 +100,12 @@ class Context; class AlterCommands : public std::vector { public: - void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const; - /// For storages that don't support MODIFY_ORDER_BY. - void apply(ColumnsDescription & columns_description) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const; + + /// Used for primiteive table engines, where only columns metadata can be changed + void applyForColumnsOnly(ColumnsDescription & columns_description) const; void validate(const IStorage & table, const Context & context); bool isMutable() const; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 687ca970311..0fd1994f5ce 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TYPE_MISMATCH; + extern const int SETTINGS_ARE_NOT_SUPPORTED; } IStorage::IStorage(ColumnsDescription columns_) @@ -292,6 +293,11 @@ bool IStorage::isVirtualColumn(const String & column_name) const return getColumns().get(column_name).is_virtual; } +bool IStorage::hasSetting(const String & /* setting_name */) const +{ + throw Exception("Storage '" + getName() + "' doesn't support settings.", ErrorCodes::SETTINGS_ARE_NOT_SUPPORTED); +} + TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) { TableStructureReadLockHolder result; @@ -362,7 +368,7 @@ void IStorage::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); - params.apply(new_columns); + params.applyForColumnsOnly(new_columns); context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 477c4456b17..f269dce220f 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,9 @@ public: /// thread-unsafe part. lockStructure must be acquired /// If |need_all| is set, then checks that all the columns of the table are in the block. void check(const Block & block, bool need_all = false) const; + /// Check storage has setting + virtual bool hasSetting(const String & setting_name) const; + protected: /// still thread-unsafe part. void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. void setIndices(IndicesDescription indices_); @@ -136,7 +140,6 @@ protected: /// still thread-unsafe part. /// Returns whether the column is virtual - by default all columns are real. /// Initially reserved virtual column name may be shadowed by real column. virtual bool isVirtualColumn(const String & column_name) const; - private: ColumnsDescription columns; /// combined real and virtual columns const ColumnsDescription virtuals = {}; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b32470f9f77..25e4246a414 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -87,6 +87,7 @@ namespace ErrorCodes extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; extern const int BAD_TTL_EXPRESSION; + extern const int UNKNOWN_SETTING; } @@ -1174,7 +1175,8 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + SettingsChanges new_changes; + commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); if (getIndices().empty() && !new_indices.empty() && !context.getSettingsRef().allow_experimental_data_skipping_indices) @@ -1262,6 +1264,13 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast, /* only_check = */ true); + for (const auto & setting : new_changes) + { + if (!hasSetting(setting.name)) + throw Exception{"Storage '" + getName() + "' doesn't have setting '" + setting.name + "'", ErrorCodes::UNKNOWN_SETTING}; + + } + /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; NameToNameMap unused_map; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 29962382749..33200500667 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -629,7 +629,7 @@ public: String sampling_expr_column_name; Names columns_required_for_sampling; - const MergeTreeSettings settings; + MergeTreeSettings settings; /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 3c334b2a48b..6a857675018 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -701,7 +701,7 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_ auto new_columns = getColumns(); auto new_indices = getIndices(); - params.apply(new_columns); + params.applyForColumnsOnly(new_columns); context.getDatabase(database_name_)->alterTable(context, table_name_, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 27ceb1f45db..e8aed6d5702 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -350,7 +350,7 @@ void StorageDistributed::alter( auto new_columns = getColumns(); auto new_indices = getIndices(); - params.apply(new_columns); + params.applyForColumnsOnly(new_columns); context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 4c029fab677..a0454185307 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -401,7 +401,7 @@ void StorageMerge::alter( auto new_columns = getColumns(); auto new_indices = getIndices(); - params.apply(new_columns); + params.applyForColumnsOnly(new_columns); context.getDatabase(database_name_)->alterTable(context, table_name_, new_columns, new_indices, {}); setColumns(new_columns); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0536423101d..d8360cb61e4 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ namespace ErrorCodes extern const int INCORRECT_FILE_NAME; extern const int CANNOT_ASSIGN_OPTIMIZE; extern const int INCOMPATIBLE_COLUMNS; + extern const int UNKNOWN_SETTING; } namespace ActionLocks @@ -245,9 +247,21 @@ void StorageMergeTree::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); - params.apply(new_columns); + ASTPtr new_order_by_ast = order_by_ast; + ASTPtr new_primary_key_ast = primary_key_ast; + ASTPtr new_ttl_table_ast = ttl_table_ast; + SettingsChanges new_changes; + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); + IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) + { + auto & storage_ast = ast.as(); + if (!new_changes.empty()) + storage_ast.settings->changes.insert(storage_ast.settings->changes.begin(), new_changes.begin(), new_changes.end()); + }; + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); + settings.applyChanges(new_changes); return; } @@ -263,7 +277,8 @@ void StorageMergeTree::alter( ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + SettingsChanges new_changes; + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); auto transactions = prepareAlterTransactions(new_columns, new_indices, context); @@ -844,7 +859,9 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; ASTPtr ignored_ttl_table_ast; - alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); + SettingsChanges ignore_settings_changes; + + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast, ignore_settings_changes); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) @@ -1009,6 +1026,12 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons } + +bool StorageMergeTree::hasSetting(const String & setting_name) const +{ + return settings.findIndex(setting_name) != MergeTreeSettings::npos; +} + void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { // TODO: should get some locks to prevent race with 'alter … modify column' diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0de9618d915..e2d24ceecdd 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -128,7 +128,10 @@ private: friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; + bool hasSetting(const String & setting_name) const override; + protected: + /** Attach the table with the appropriate name, along the appropriate path (with / at the end), * (correctness of names and paths are not checked) * consisting of the specified columns. diff --git a/dbms/src/Storages/StorageNull.cpp b/dbms/src/Storages/StorageNull.cpp index f6de6e87e37..f5b463ff972 100644 --- a/dbms/src/Storages/StorageNull.cpp +++ b/dbms/src/Storages/StorageNull.cpp @@ -38,7 +38,7 @@ void StorageNull::alter( ColumnsDescription new_columns = getColumns(); IndicesDescription new_indices = getIndices(); - params.apply(new_columns); + params.applyForColumnsOnly(new_columns); context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 934d651fead..87ab8815c57 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1510,7 +1510,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; ASTPtr ignored_ttl_table_ast; - alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast); + SettingsChanges ignored_changes; + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast, ignored_changes); size_t modified_parts = 0; auto parts = getDataParts(); @@ -3119,7 +3120,8 @@ void StorageReplicatedMergeTree::alter( ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast); + SettingsChanges new_changes; + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); String new_columns_str = new_columns.toString(); if (new_columns_str != getColumns().toString()) From f7e0d17490ddace60010895d0609e01ff9717dd9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 24 Jul 2019 18:22:16 +0300 Subject: [PATCH 02/59] Fix some bugs --- dbms/src/Parsers/ParserAlterQuery.cpp | 9 +++++++++ dbms/src/Parsers/ParserAlterQuery.h | 1 + dbms/src/Storages/AlterCommands.cpp | 4 ++-- dbms/src/Storages/StorageMergeTree.cpp | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 98891bbdf5f..b0ff5bf6c8c 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_modify_order_by("MODIFY ORDER BY"); ParserKeyword s_modify_ttl("MODIFY TTL"); + ParserKeyword s_modify_setting("MODIFY SETTING"); ParserKeyword s_add_index("ADD INDEX"); ParserKeyword s_drop_index("DROP INDEX"); @@ -63,6 +65,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserList parser_assignment_list( std::make_unique(), std::make_unique(TokenType::Comma), /* allow_empty = */ false); + ParserSetQuery parser_settings(true); if (s_add_column.ignore(pos, expected)) { @@ -289,6 +292,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; command->type = ASTAlterCommand::MODIFY_TTL; } + else if (s_modify_setting.ignore(pos, expected)) + { + if (!parser_settings.parse(pos, command->settings_changes, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_SETTING; + } else return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 282a4277e17..49d10eac34b 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -13,6 +13,7 @@ namespace DB * [CLEAR COLUMN [IF EXISTS] col_to_clear [IN PARTITION partition],] * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] * [MODIFY PRIMARY KEY (a, b, c...)] + * [MODIFY SETTING setting_name=setting_value, ...] * [COMMENT COLUMN [IF EXISTS] col_name string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 4b733f86d22..789b66b271a 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -317,7 +317,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri } else if (type == MODIFY_SETTING) { - changes.insert(changes.begin(), settings_changes.begin(), settings_changes.end()); + changes.insert(changes.end(), settings_changes.begin(), settings_changes.end()); } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); @@ -344,7 +344,7 @@ void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescr for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, changes); + command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); columns_description = std::move(new_columns_description); indices_description = std::move(new_indices_description); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index d8360cb61e4..f39735b1595 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -256,10 +256,10 @@ void StorageMergeTree::alter( { auto & storage_ast = ast.as(); if (!new_changes.empty()) - storage_ast.settings->changes.insert(storage_ast.settings->changes.begin(), new_changes.begin(), new_changes.end()); + storage_ast.settings->changes.insert(storage_ast.settings->changes.end(), new_changes.begin(), new_changes.end()); }; - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); setColumns(std::move(new_columns)); settings.applyChanges(new_changes); return; From 86831fe500c5dbf1124c7cecaf832f502700d5ba Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 29 Jul 2019 14:03:50 +0300 Subject: [PATCH 03/59] Add ability to create immutable settings --- .../performance-test/PerformanceTestInfo.cpp | 2 +- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Core/Settings.h | 2 +- dbms/src/Core/SettingsCommon.h | 78 +++++++++++++++++-- dbms/src/Storages/Kafka/KafkaSettings.cpp | 2 +- dbms/src/Storages/Kafka/KafkaSettings.h | 22 +++--- .../Storages/MergeTree/MergeTreeSettings.cpp | 2 +- .../Storages/MergeTree/MergeTreeSettings.h | 4 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- 9 files changed, 89 insertions(+), 26 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index f016b257c5f..aa8c05f2d44 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -96,7 +96,7 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) } extractSettings(config, "settings", config_settings, settings_to_apply); - settings.applyChanges(settings_to_apply); + settings.loadFromChanges(settings_to_apply); if (settings_contain("average_rows_speed_precision")) TestStats::avg_rows_speed_precision = diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 35213e3064f..7b955e47938 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -439,6 +439,7 @@ namespace ErrorCodes extern const int CANNOT_DELETE_TIMER = 462; extern const int CANNOT_FCNTL = 463; extern const int SETTINGS_ARE_NOT_SUPPORTED = 464; + extern const int IMMUTABLE_SETTING = 465; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 60f2599c73f..7501e4324a7 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -42,7 +42,7 @@ struct Settings : public SettingsCollection * but we are not going to do it, because settings is used everywhere as static struct fields. */ -#define LIST_OF_SETTINGS(M) \ +#define LIST_OF_SETTINGS(M, IM) \ M(SettingUInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading") \ diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 08064096133..e9dce9c9dcc 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -17,6 +17,10 @@ class Field; class ReadBuffer; class WriteBuffer; +namespace ErrorCodes +{ + extern const int IMMUTABLE_SETTING; +} /** One setting for any type. * Stores a value within itself, as well as a flag - whether the value was changed. @@ -317,6 +321,7 @@ private: size_t offset_of_changed; StringRef name; StringRef description; + bool immutable; GetStringFunction get_string; GetFieldFunction get_field; SetStringFunction set_string; @@ -396,6 +401,7 @@ public: const_reference(const const_reference & src) = default; const StringRef & getName() const { return member->name; } const StringRef & getDescription() const { return member->description; } + bool isImmutable() const { return member->immutable; } bool isChanged() const { return member->isChanged(*collection); } Field getValue() const { return member->get_field(*collection); } String getValueAsString() const { return member->get_string(*collection); } @@ -497,6 +503,40 @@ public: void set(size_t index, const String & value) { (*this)[index].setValue(value); } void set(const String & name, const String & value) { (*this)[name].setValue(value); } + /// Updates setting's value. + void update(size_t index, const Field & value) + { + auto ref = (*this)[index]; + if (ref.isImmutable()) + throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); + ref.setValue(value); + } + + void update(const String & name, const Field & value) + { + auto ref = (*this)[name]; + if (ref.isImmutable()) + throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); + ref.setValue(value); + } + + /// Updates setting's value. Read value in text form from string (for example, from configuration file or from URL parameter). + void update(size_t index, const String & value) + { + auto ref = (*this)[index]; + if (ref.isImmutable()) + throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); + (*this)[index].setValue(value); + } + + void update(const String & name, const String & value) + { + auto ref = (*this)[name]; + if (ref.isImmutable()) + throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); + (*this)[name].setValue(value); + } + /// Returns value of a setting. Field get(size_t index) const { return (*this)[index].getValue(); } Field get(const String & name) const { return (*this)[name].getValue(); } @@ -561,17 +601,30 @@ public: } /// Applies changes to the settings. - void applyChange(const SettingChange & change) + void loadFromChange(const SettingChange & change) { set(change.name, change.value); } - void applyChanges(const SettingsChanges & changes) + void loadFromChanges(const SettingsChanges & changes) { for (const SettingChange & change : changes) - applyChange(change); + loadFromChange(change); } + /// Applies changes to the settings. + void updateFromChange(const SettingChange & change) + { + update(change.name, change.value); + } + + void updateFromChanges(const SettingsChanges & changes) + { + for (const SettingChange & change : changes) + updateFromChange(change); + } + + void copyChangesFrom(const Derived & src) { for (const auto & member : members()) @@ -615,7 +668,7 @@ public: }; #define DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS_MACRO) \ - LIST_OF_SETTINGS_MACRO(DECLARE_SETTINGS_COLLECTION_DECLARE_VARIABLES_HELPER_) + LIST_OF_SETTINGS_MACRO(DECLARE_SETTINGS_COLLECTION_DECLARE_VARIABLES_HELPER_, DECLARE_SETTINGS_COLLECTION_DECLARE_VARIABLES_HELPER_) #define IMPLEMENT_SETTINGS_COLLECTION(DERIVED_CLASS_NAME, LIST_OF_SETTINGS_MACRO) \ @@ -625,9 +678,9 @@ public: using Derived = DERIVED_CLASS_NAME; \ struct Functions \ { \ - LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_COLLECTION_DEFINE_FUNCTIONS_HELPER_) \ + LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_COLLECTION_DEFINE_FUNCTIONS_HELPER_, IMPLEMENT_SETTINGS_COLLECTION_DEFINE_FUNCTIONS_HELPER_) \ }; \ - LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_COLLECTION_ADD_MEMBER_INFO_HELPER_) \ + LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_, IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_) \ } @@ -645,13 +698,22 @@ public: static Field NAME##_castValueWithoutApplying(const Field & value) { TYPE temp{DEFAULT}; temp.set(value); return temp.toField(); } -#define IMPLEMENT_SETTINGS_COLLECTION_ADD_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ +#define IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ static_assert(std::is_same_v().NAME.changed), bool>); \ add({offsetof(Derived, NAME.changed), \ - StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), \ + StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ &Functions::NAME##_serialize, &Functions::NAME##_deserialize, \ &Functions::NAME##_castValueWithoutApplying }); +#define IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ + static_assert(std::is_same_v().NAME.changed), bool>); \ + add({offsetof(Derived, NAME.changed), \ + StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ + &Functions::NAME##_getString, &Functions::NAME##_getField, \ + &Functions::NAME##_setString, &Functions::NAME##_setField, \ + &Functions::NAME##_serialize, &Functions::NAME##_deserialize, \ + &Functions::NAME##_castValueWithoutApplying }); + } diff --git a/dbms/src/Storages/Kafka/KafkaSettings.cpp b/dbms/src/Storages/Kafka/KafkaSettings.cpp index d08282a9794..b08d45780bb 100644 --- a/dbms/src/Storages/Kafka/KafkaSettings.cpp +++ b/dbms/src/Storages/Kafka/KafkaSettings.cpp @@ -22,7 +22,7 @@ void KafkaSettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + loadFromChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/dbms/src/Storages/Kafka/KafkaSettings.h b/dbms/src/Storages/Kafka/KafkaSettings.h index f3642d66803..366ec715bf0 100644 --- a/dbms/src/Storages/Kafka/KafkaSettings.h +++ b/dbms/src/Storages/Kafka/KafkaSettings.h @@ -14,17 +14,17 @@ class ASTStorage; struct KafkaSettings : public SettingsCollection { -#define LIST_OF_KAFKA_SETTINGS(M) \ - M(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ - M(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ - M(SettingString, kafka_group_name, "", "A group of Kafka consumers.") \ - M(SettingString, kafka_format, "", "The message format for Kafka engine.") \ - M(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.") \ - M(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine") \ - M(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.") \ - M(SettingUInt64, kafka_max_block_size, 0, "The maximum block size per table for Kafka engine.") \ - M(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block") \ - M(SettingUInt64, kafka_commit_every_batch, 0, "Commit every consumed and handled batch instead of a single commit after writing a whole block") +#define LIST_OF_KAFKA_SETTINGS(M, IM) \ + IM(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ + IM(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ + IM(SettingString, kafka_group_name, "", "A group of Kafka consumers.") \ + IM(SettingString, kafka_format, "", "The message format for Kafka engine.") \ + IM(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.") \ + IM(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine") \ + IM(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.") \ + IM(SettingUInt64, kafka_max_block_size, 0, "The maximum block size per table for Kafka engine.") \ + IM(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block") \ + IM(SettingUInt64, kafka_commit_every_batch, 0, "Commit every consumed and handled batch instead of a single commit after writing a whole block") DECLARE_SETTINGS_COLLECTION(LIST_OF_KAFKA_SETTINGS) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index e3600b6ac4a..562f2d8f402 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -46,7 +46,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + loadFromChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 9bd58e77f9c..c982bef324f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -24,8 +24,8 @@ class ASTStorage; struct MergeTreeSettings : public SettingsCollection { -#define LIST_OF_MERGE_TREE_SETTINGS(M) \ - M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ +#define LIST_OF_MERGE_TREE_SETTINGS(M, IM) \ + IM(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ \ /** Merge settings. */ \ M(SettingUInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).") \ diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f39735b1595..41e2b04895a 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -261,7 +261,7 @@ void StorageMergeTree::alter( context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); setColumns(std::move(new_columns)); - settings.applyChanges(new_changes); + settings.updateFromChanges(new_changes); return; } From e12cb8da6df1efd5b21d4e4cd1207ed8e2a55185 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 15:52:08 +0300 Subject: [PATCH 04/59] Add some changes --- dbms/src/Core/SettingsCommon.h | 60 +++++++----------- dbms/src/Storages/AlterCommands.cpp | 34 ++++++++++ dbms/src/Storages/AlterCommands.h | 7 +++ .../00980_merge_alter_settings.reference | 2 + .../00980_merge_alter_settings.sql | 39 ++++++++++++ ...keeper_merge_tree_alter_settings.reference | 10 +++ ...80_zookeeper_merge_tree_alter_settings.sql | 62 +++++++++++++++++++ 7 files changed, 177 insertions(+), 37 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference create mode 100644 dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql create mode 100644 dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference create mode 100644 dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index e9dce9c9dcc..618fc00aa75 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -321,7 +321,7 @@ private: size_t offset_of_changed; StringRef name; StringRef description; - bool immutable; + bool updateable; GetStringFunction get_string; GetFieldFunction get_field; SetStringFunction set_string; @@ -401,7 +401,7 @@ public: const_reference(const const_reference & src) = default; const StringRef & getName() const { return member->name; } const StringRef & getDescription() const { return member->description; } - bool isImmutable() const { return member->immutable; } + bool isUpdateable() const { return member->updateable; } bool isChanged() const { return member->isChanged(*collection); } Field getValue() const { return member->get_field(*collection); } String getValueAsString() const { return member->get_string(*collection); } @@ -421,6 +421,18 @@ public: reference(const const_reference & src) : const_reference(src) {} void setValue(const Field & value) { this->member->set_field(*const_cast(this->collection), value); } void setValue(const String & value) { this->member->set_string(*const_cast(this->collection), value); } + void updateValue(const Field & value) + { + if (!this->member->updateable) + throw Exception("Setting '" + this->member->name.toString() + "' is restricted for updates.", ErrorCodes::IMMUTABLE_SETTING); + setValue(value); + } + void updateValue(const String & value) + { + if (!this->member->updateable) + throw Exception("Setting '" + this->member->name.toString() + "' is restricted for updates.", ErrorCodes::IMMUTABLE_SETTING); + setValue(value); + } }; /// Iterator to iterating through all the settings. @@ -503,39 +515,14 @@ public: void set(size_t index, const String & value) { (*this)[index].setValue(value); } void set(const String & name, const String & value) { (*this)[name].setValue(value); } - /// Updates setting's value. - void update(size_t index, const Field & value) - { - auto ref = (*this)[index]; - if (ref.isImmutable()) - throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); - ref.setValue(value); - } + /// Updates setting's value. Checks it' mutability. + void update(size_t index, const Field & value) { (*this)[index].updateValue(value); } - void update(const String & name, const Field & value) - { - auto ref = (*this)[name]; - if (ref.isImmutable()) - throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); - ref.setValue(value); - } + void update(const String & name, const Field & value) { (*this)[name].updateValue(value); } - /// Updates setting's value. Read value in text form from string (for example, from configuration file or from URL parameter). - void update(size_t index, const String & value) - { - auto ref = (*this)[index]; - if (ref.isImmutable()) - throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); - (*this)[index].setValue(value); - } + void update(size_t index, const String & value) { (*this)[index].updateValue(value); } - void update(const String & name, const String & value) - { - auto ref = (*this)[name]; - if (ref.isImmutable()) - throw Exception("Cannot modify immutable setting '" + ref.getName().toString() + "'", ErrorCodes::IMMUTABLE_SETTING); - (*this)[name].setValue(value); - } + void update(const String & name, const String & value) { (*this)[name].updateValue(value); } /// Returns value of a setting. Field get(size_t index) const { return (*this)[index].getValue(); } @@ -600,7 +587,7 @@ public: return found_changes; } - /// Applies changes to the settings. + /// Applies changes to the settings. Doesn't check settings mutability. void loadFromChange(const SettingChange & change) { set(change.name, change.value); @@ -612,7 +599,7 @@ public: loadFromChange(change); } - /// Applies changes to the settings. + /// Applies changes to the settings, checks settings mutability void updateFromChange(const SettingChange & change) { update(change.name, change.value); @@ -701,7 +688,7 @@ public: #define IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ static_assert(std::is_same_v().NAME.changed), bool>); \ add({offsetof(Derived, NAME.changed), \ - StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ + StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ &Functions::NAME##_serialize, &Functions::NAME##_deserialize, \ @@ -710,10 +697,9 @@ public: #define IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ static_assert(std::is_same_v().NAME.changed), bool>); \ add({offsetof(Derived, NAME.changed), \ - StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ + StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ &Functions::NAME##_serialize, &Functions::NAME##_deserialize, \ &Functions::NAME##_castValueWithoutApplying }); - } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 789b66b271a..16d6f0a0aa0 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -332,6 +332,11 @@ bool AlterCommand::isMutable() const return true; } +bool AlterCommand::isSettingsAlter() const +{ + return type == MODIFY_SETTING; +} + void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const { @@ -567,6 +572,31 @@ void AlterCommands::applyForColumnsOnly(ColumnsDescription & columns_description columns_description = std::move(out_columns_description); } + +void AlterCommands::applyForSettingsOnly(SettingsChanges & changes) const +{ + ColumnsDescription out_columns_description; + IndicesDescription indices_description; + ASTPtr out_order_by; + ASTPtr out_primary_key; + ASTPtr out_ttl_table; + SettingsChanges out_changes; + apply(out_columns_description, indices_description, out_order_by, out_primary_key, out_ttl_table, out_changes); + + if (out_columns_description.begin() != out_columns_description.end()) + throw Exception("Alter modifying columns, but only settings change applied.", ErrorCodes::LOGICAL_ERROR); + if (out_order_by) + throw Exception("Alter modifying ORDER BY expression, but only settings change applied.", ErrorCodes::LOGICAL_ERROR); + if (out_primary_key) + throw Exception("Alter modifying PRIMARY KEY expression, but only settings change applied.", ErrorCodes::LOGICAL_ERROR); + if (!indices_description.indices.empty()) + throw Exception("Alter modifying indices, but only settings change applied.", ErrorCodes::NOT_IMPLEMENTED); + if (out_ttl_table) + throw Exception("Alter modifying TTL, but only settings change applied.", ErrorCodes::NOT_IMPLEMENTED); + + changes = std::move(out_changes); +} + bool AlterCommands::isMutable() const { for (const auto & param : *this) @@ -578,4 +608,8 @@ bool AlterCommands::isMutable() const return false; } +bool AlterCommands::isSettingsAlter() const +{ + return std::all_of(begin(), end(), [](const AlterCommand & c) { return c.isSettingsAlter(); }); +} } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index fe05d014cba..9b59514020f 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -93,6 +93,9 @@ struct AlterCommand /// Checks that not only metadata touched by that command bool isMutable() const; + + /// checks that only settings changed by alter + bool isSettingsAlter() const; }; class Context; @@ -107,8 +110,12 @@ public: /// Used for primitive table engines, where only columns metadata can be changed void applyForColumnsOnly(ColumnsDescription & columns_description) const; + /// Apply alter commands only for settings. Exception will be thrown if any other part of table structure will be modified. + void applyForSettingsOnly(SettingsChanges & changes) const; + void validate(const IStorage & table, const Context & context); bool isMutable() const; + bool isSettingsAlter() const; }; } diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference new file mode 100644 index 00000000000..ad64346f90d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference @@ -0,0 +1,2 @@ +CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql new file mode 100644 index 00000000000..52f5e4dd444 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS log_for_alter; + +CREATE TABLE log_for_alter ( + id UInt64, + Data String +) ENGINE = Log(); + +ALTER TABLE log_for_alter MODIFY SETTING aaa=123; -- { serverError 468 } + +DROP TABLE IF EXISTS log_for_alter; + +DROP TABLE IF EXISTS table_for_alter; + +CREATE TABLE table_for_alter ( + id UInt64, + Data String +) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity=4096; + +ALTER TABLE table_for_alter MODIFY SETTING index_granularity=555; -- { serverError 469 } + +SHOW CREATE TABLE table_for_alter; + +ALTER TABLE table_for_alter MODIFY SETTING parts_to_throw_insert = 1, parts_to_delay_insert = 1; + +SHOW CREATE TABLE table_for_alter; + +INSERT INTO table_for_alter VALUES (1, '1'); +INSERT INTO table_for_alter VALUES (2, '2'); -- { serverError 252 } + +DETACH TABLE table_for_alter; + +ATTACH TABLE table_for_alter; + +INSERT INTO table_for_alter VALUES (2, '2'); -- { serverError 252 } + +ALTER TABLE table_for_alter MODIFY SETTING xxx_yyy=124; -- { serverError 115 } + +DROP TABLE IF EXISTS table_for_alter; + diff --git a/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference new file mode 100644 index 00000000000..e55bfadd538 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference @@ -0,0 +1,10 @@ +CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192 +4 +4 +4 +4 +6 +6 +CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1 +CREATE TABLE default.replicated_table_for_alter2 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\') ORDER BY id SETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 diff --git a/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql new file mode 100644 index 00000000000..9c7a7c33329 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql @@ -0,0 +1,62 @@ +DROP TABLE IF EXISTS replicated_table_for_alter1; +DROP TABLE IF EXISTS replicated_table_for_alter2; + +CREATE TABLE replicated_table_for_alter1 ( + id UInt64, + Data String +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_table_for_alter', '1') ORDER BY id; + +CREATE TABLE replicated_table_for_alter2 ( + id UInt64, + Data String +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_table_for_alter', '2') ORDER BY id; + +SHOW CREATE TABLE replicated_table_for_alter1; + +ALTER TABLE replicated_table_for_alter1 MODIFY SETTING index_granularity = 4096; -- { serverError 469 } + +SHOW CREATE TABLE replicated_table_for_alter1; + +INSERT INTO replicated_table_for_alter2 VALUES (1, '1'), (2, '2'); + +SYSTEM SYNC REPLICA replicated_table_for_alter1; + +ALTER TABLE replicated_table_for_alter1 MODIFY SETTING use_minimalistic_part_header_in_zookeeper = 1; + +INSERT INTO replicated_table_for_alter1 VALUES (3, '3'), (4, '4'); + +SYSTEM SYNC REPLICA replicated_table_for_alter2; + +SELECT COUNT() FROM replicated_table_for_alter1; +SELECT COUNT() FROM replicated_table_for_alter2; + +DETACH TABLE replicated_table_for_alter2; +ATTACH TABLE replicated_table_for_alter2; + +DETACH TABLE replicated_table_for_alter1; +ATTACH TABLE replicated_table_for_alter1; + +SELECT COUNT() FROM replicated_table_for_alter1; +SELECT COUNT() FROM replicated_table_for_alter2; + +ALTER TABLE replicated_table_for_alter2 MODIFY SETTING parts_to_throw_insert = 1, parts_to_delay_insert = 1; +INSERT INTO replicated_table_for_alter2 VALUES (3, '1'), (4, '2'); -- { serverError 252 } + +INSERT INTO replicated_table_for_alter1 VALUES (5, '5'), (6, '6'); + +SYSTEM SYNC REPLICA replicated_table_for_alter2; + +SELECT COUNT() FROM replicated_table_for_alter1; +SELECT COUNT() FROM replicated_table_for_alter2; + +DETACH TABLE replicated_table_for_alter2; +ATTACH TABLE replicated_table_for_alter2; + +DETACH TABLE replicated_table_for_alter1; +ATTACH TABLE replicated_table_for_alter1; + +SHOW CREATE TABLE replicated_table_for_alter1; +SHOW CREATE TABLE replicated_table_for_alter2; + +DROP TABLE IF EXISTS replicated_table_for_alter2; +DROP TABLE IF EXISTS replicated_table_for_alter1; From 84fd4906cac21b2b63ac89242fba707b4d59eefb Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 16:04:29 +0300 Subject: [PATCH 05/59] Changes for engines --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 24 +++++++++++++++++++ dbms/src/Storages/MergeTree/MergeTreeData.h | 8 +++++++ dbms/src/Storages/StorageMergeTree.cpp | 15 ++++++------ dbms/src/Storages/StorageMergeTree.h | 2 -- .../Storages/StorageReplicatedMergeTree.cpp | 12 +++++++++- 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a99b6fbabeb..70c6c409c30 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -1583,6 +1584,29 @@ void MergeTreeData::alterDataPart( return; } +void MergeTreeData::alterSettings( + const SettingsChanges & new_changes, + const String & current_database_name, + const String & current_table_name, + const Context & context) +{ + settings.updateFromChanges(new_changes); + IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) + { + if (!new_changes.empty()) + { + auto & storage_ast = ast.as(); + storage_ast.settings->changes.insert(storage_ast.settings->changes.end(), new_changes.begin(), new_changes.end()); + } + }; + context.getDatabase(current_database_name)->alterTable(context, current_table_name, getColumns(), getIndices(), storage_modifier); +} + +bool MergeTreeData::hasSetting(const String & setting_name) const +{ + return settings.findIndex(setting_name) != MergeTreeSettings::npos; +} + void MergeTreeData::removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part) { auto & empty_columns = data_part->empty_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index cd95e5c7e58..d11074e1156 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -509,6 +509,14 @@ public: bool skip_sanity_checks, AlterDataPartTransactionPtr& transaction); + void alterSettings( + const SettingsChanges & new_changes, + const String & current_database_name, + const String & current_table_name, + const Context & context); + + bool hasSetting(const String & setting_name) const override; + /// Remove columns, that have been markedd as empty after zeroing values with expired ttl void removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index e823136e3bb..438b9468b69 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -246,12 +246,19 @@ void StorageMergeTree::alter( if (!params.isMutable()) { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + SettingsChanges new_changes; + if (params.isSettingsAlter()) + { + params.applyForSettingsOnly(new_changes); + alterSettings(new_changes, current_database_name, current_table_name, context); + return; + } + auto new_columns = getColumns(); auto new_indices = getIndices(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; - SettingsChanges new_changes; params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) { @@ -1031,12 +1038,6 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons } - -bool StorageMergeTree::hasSetting(const String & setting_name) const -{ - return settings.findIndex(setting_name) != MergeTreeSettings::npos; -} - void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { // TODO: should get some locks to prevent race with 'alter … modify column' diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index e2d24ceecdd..0df90604f67 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -128,8 +128,6 @@ private: friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; - bool hasSetting(const String & setting_name) const override; - protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 365f7e70143..23c49b05c1a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3072,13 +3072,23 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p void StorageReplicatedMergeTree::alter( - const AlterCommands & params, const String & /*database_name*/, const String & /*table_name*/, + const AlterCommands & params, const String & current_database_name, const String & current_table_name, const Context & query_context, TableStructureWriteLockHolder & table_lock_holder) { assertNotReadonly(); LOG_DEBUG(log, "Doing ALTER"); + if (params.isSettingsAlter()) + { + LOG_DEBUG(log, "ALTER settings only"); + lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId()); + SettingsChanges new_changes; + params.applyForSettingsOnly(new_changes); + alterSettings(new_changes, current_database_name, current_table_name, query_context); + return; + } + /// Alter is done by modifying the metadata nodes in ZK that are shared between all replicas /// (/columns, /metadata). We set contents of the shared nodes to the new values and wait while /// replicas asynchronously apply changes (see ReplicatedMergeTreeAlterThread.cpp) and modify From 303c4e5a58347969da67efccfc7890e33619176d Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 16:06:09 +0300 Subject: [PATCH 06/59] Make index_granularity_bytes immutable --- dbms/src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index c982bef324f..3012b9e1694 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -77,7 +77,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingBool, use_minimalistic_part_header_in_zookeeper, false, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (/columns and /checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.") \ M(SettingUInt64, finished_mutations_to_keep, 100, "How many records about mutations that are done to keep. If zero, then keep all of them.") \ M(SettingUInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).") \ - M(SettingUInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).") \ + IM(SettingUInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).") \ M(SettingInt64, merge_with_ttl_timeout, 3600 * 24, "Minimal time in seconds, when merge with TTL can be repeated.") \ M(SettingBool, write_final_mark, 1, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)") \ M(SettingBool, enable_mixed_granularity_parts, 0, "Enable parts with adaptive and non adaptive granularity") From ca29343f546122340b934ae3b340770e35011e3d Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 17:09:36 +0300 Subject: [PATCH 07/59] Uniq settings --- dbms/src/Core/SettingsCommon.h | 2 +- dbms/src/Storages/Kafka/KafkaSettings.h | 20 +++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 12 +++++++++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../Storages/MergeTree/MergeTreeSettings.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 10 +--------- .../00980_merge_alter_settings.reference | 3 +++ .../00980_merge_alter_settings.sql | 12 +++++++++++ 8 files changed, 39 insertions(+), 23 deletions(-) diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 618fc00aa75..c079b875b9d 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -599,7 +599,7 @@ public: loadFromChange(change); } - /// Applies changes to the settings, checks settings mutability + /// Applies changes to the settings, checks settings mutability. void updateFromChange(const SettingChange & change) { update(change.name, change.value); diff --git a/dbms/src/Storages/Kafka/KafkaSettings.h b/dbms/src/Storages/Kafka/KafkaSettings.h index 366ec715bf0..fd8d9b3dc08 100644 --- a/dbms/src/Storages/Kafka/KafkaSettings.h +++ b/dbms/src/Storages/Kafka/KafkaSettings.h @@ -15,16 +15,16 @@ struct KafkaSettings : public SettingsCollection { #define LIST_OF_KAFKA_SETTINGS(M, IM) \ - IM(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ - IM(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ - IM(SettingString, kafka_group_name, "", "A group of Kafka consumers.") \ - IM(SettingString, kafka_format, "", "The message format for Kafka engine.") \ - IM(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.") \ - IM(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine") \ - IM(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.") \ - IM(SettingUInt64, kafka_max_block_size, 0, "The maximum block size per table for Kafka engine.") \ - IM(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block") \ - IM(SettingUInt64, kafka_commit_every_batch, 0, "Commit every consumed and handled batch instead of a single commit after writing a whole block") + M(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ + M(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ + M(SettingString, kafka_group_name, "", "A group of Kafka consumers.") \ + M(SettingString, kafka_format, "", "The message format for Kafka engine.") \ + M(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.") \ + M(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine") \ + M(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.") \ + M(SettingUInt64, kafka_max_block_size, 0, "The maximum block size per table for Kafka engine.") \ + M(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block") \ + M(SettingUInt64, kafka_commit_every_batch, 0, "Commit every consumed and handled batch instead of a single commit after writing a whole block") DECLARE_SETTINGS_COLLECTION(LIST_OF_KAFKA_SETTINGS) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 70c6c409c30..7c65b33d830 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1595,8 +1595,16 @@ void MergeTreeData::alterSettings( { if (!new_changes.empty()) { - auto & storage_ast = ast.as(); - storage_ast.settings->changes.insert(storage_ast.settings->changes.end(), new_changes.begin(), new_changes.end()); + auto & storage_changes = ast.as().settings->changes; + /// Make storage settings unique + for (const auto & change : new_changes) + { + auto finder = [&change] (const SettingChange & c) { return c.name == change.name;}; + if (auto it = std::find_if(storage_changes.begin(), storage_changes.end(), finder); it != storage_changes.end()) + it->value = change.value; + else + storage_changes.push_back(change); + } } }; context.getDatabase(current_database_name)->alterTable(context, current_table_name, getColumns(), getIndices(), storage_modifier); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d11074e1156..f681baa4488 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -509,6 +509,7 @@ public: bool skip_sanity_checks, AlterDataPartTransactionPtr& transaction); + /// Performs ALTER of table settings void alterSettings( const SettingsChanges & new_changes, const String & current_database_name, diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 562f2d8f402..9eee33554ab 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -67,7 +67,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) #define ADD_IF_ABSENT(NAME) \ if (std::find_if(changes.begin(), changes.end(), \ - [](const SettingChange & c) { return c.name == #NAME; }) \ + [](const SettingChange & c) { return c.name == #NAME; }) \ == changes.end()) \ changes.push_back(SettingChange{#NAME, NAME.value}); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 438b9468b69..9fe1458f5a7 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -260,16 +260,8 @@ void StorageMergeTree::alter( ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); - IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) - { - auto & storage_ast = ast.as(); - if (!new_changes.empty()) - storage_ast.settings->changes.insert(storage_ast.settings->changes.end(), new_changes.begin(), new_changes.end()); - }; - - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); - settings.updateFromChanges(new_changes); return; } diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference index ad64346f90d..c7f912ddc79 100644 --- a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.reference @@ -1,2 +1,5 @@ CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096 CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100 +2 +CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql index 52f5e4dd444..cb1ba315001 100644 --- a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -35,5 +35,17 @@ INSERT INTO table_for_alter VALUES (2, '2'); -- { serverError 252 } ALTER TABLE table_for_alter MODIFY SETTING xxx_yyy=124; -- { serverError 115 } +ALTER TABLE table_for_alter MODIFY SETTING parts_to_throw_insert = 100, parts_to_delay_insert = 100; + +INSERT INTO table_for_alter VALUES (2, '2'); + +SHOW CREATE TABLE table_for_alter; + +SELECT COUNT() FROM table_for_alter; + +ALTER TABLE table_for_alter MODIFY SETTING check_delay_period=10, check_delay_period=20, check_delay_period=30; + +SHOW CREATE TABLE table_for_alter; + DROP TABLE IF EXISTS table_for_alter; From e62101b8e86665956e605a934e7981b3e7550a63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 19:29:31 +0300 Subject: [PATCH 08/59] Better comment --- dbms/src/Core/SettingsCommon.h | 8 ++++++-- dbms/src/Storages/IStorage.h | 3 +-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 7 +++++-- dbms/src/Storages/StorageMergeTree.cpp | 5 +++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 5 +++-- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index c079b875b9d..4b3f7020a6e 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -587,24 +587,28 @@ public: return found_changes; } - /// Applies changes to the settings. Doesn't check settings mutability. + /// Applies change to the settings. Doesn't check settings mutability. void loadFromChange(const SettingChange & change) { set(change.name, change.value); } + /// Applies changes to the settings. Should be used in initial settings loading. + /// (on table creation or loading from config) void loadFromChanges(const SettingsChanges & changes) { for (const SettingChange & change : changes) loadFromChange(change); } - /// Applies changes to the settings, checks settings mutability. + /// Applies change to the settings, checks settings mutability. void updateFromChange(const SettingChange & change) { update(change.name, change.value); } + /// Applies changes to the settings. Should be used for settigns update. + /// (ALTER MODIFY SETTINGS) void updateFromChanges(const SettingsChanges & changes) { for (const SettingChange & change : changes) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 4babb7ded3e..79760bae5a8 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -130,7 +129,7 @@ public: /// thread-unsafe part. lockStructure must be acquired /// If |need_all| is set, then checks that all the columns of the table are in the block. void check(const Block & block, bool need_all = false) const; - /// Check storage has setting + /// Check storage has setting. Exception will be thrown if it doesn't support settings at all. virtual bool hasSetting(const String & setting_name) const; protected: /// still thread-unsafe part. diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 7c65b33d830..4f7452bd79f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1269,7 +1269,6 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c { if (!hasSetting(setting.name)) throw Exception{"Storage '" + getName() + "' doesn't have setting '" + setting.name + "'", ErrorCodes::UNKNOWN_SETTING}; - } /// Check that type conversions are possible. @@ -1588,7 +1587,8 @@ void MergeTreeData::alterSettings( const SettingsChanges & new_changes, const String & current_database_name, const String & current_table_name, - const Context & context) + const Context & context, + TableStructureWriteLockHolder & /* table_lock_holder */) { settings.updateFromChanges(new_changes); IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index f681baa4488..78c7df3bb48 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -509,13 +509,16 @@ public: bool skip_sanity_checks, AlterDataPartTransactionPtr& transaction); - /// Performs ALTER of table settings + /// Performs ALTER of table settings (MergeTreeSettings). Lightweight operation, affects metadata only. + /// Not atomic, have to be done with alter intention lock. void alterSettings( const SettingsChanges & new_changes, const String & current_database_name, const String & current_table_name, - const Context & context); + const Context & context, + TableStructureWriteLockHolder & table_lock_holder); + /// All MergeTreeData children have settings. bool hasSetting(const String & setting_name) const override; /// Remove columns, that have been markedd as empty after zeroing values with expired ttl diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 9fe1458f5a7..bebc6619509 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -245,15 +245,16 @@ void StorageMergeTree::alter( { if (!params.isMutable()) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); SettingsChanges new_changes; + /// We don't need to lock table structure exclusively to ALTER settings. if (params.isSettingsAlter()) { params.applyForSettingsOnly(new_changes); - alterSettings(new_changes, current_database_name, current_table_name, context); + alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); return; } + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); ASTPtr new_order_by_ast = order_by_ast; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 23c49b05c1a..e11ea304c61 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3081,11 +3081,12 @@ void StorageReplicatedMergeTree::alter( if (params.isSettingsAlter()) { + /// We don't replicate settings ALTER. It's local operation. + /// Also we don't upgrade alter lock to table structure lock. LOG_DEBUG(log, "ALTER settings only"); - lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId()); SettingsChanges new_changes; params.applyForSettingsOnly(new_changes); - alterSettings(new_changes, current_database_name, current_table_name, query_context); + alterSettings(new_changes, current_database_name, current_table_name, query_context, table_lock_holder); return; } From bca40a860890fb1dc94dc0f0aafd6aa5591b09f4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 6 Aug 2019 19:34:27 +0300 Subject: [PATCH 09/59] Better name --- dbms/src/Storages/StorageMergeTree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index bebc6619509..8f3897ba128 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -864,9 +864,9 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; ASTPtr ignored_ttl_table_ast; - SettingsChanges ignore_settings_changes; + SettingsChanges ignored_settings_changes; - alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast, ignore_settings_changes); + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast, ignored_settings_changes); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) From 517730900157e84f02773165db9dbb66eea95074 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2019 16:35:28 +0300 Subject: [PATCH 10/59] Add test for race condition --- .../00980_alter_settings_race.reference | 1 + .../0_stateless/00980_alter_settings_race.sh | 32 +++++++++++++++++++ .../00980_merge_alter_settings.sql | 2 +- 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00980_alter_settings_race.reference create mode 100755 dbms/tests/queries/0_stateless/00980_alter_settings_race.sh diff --git a/dbms/tests/queries/0_stateless/00980_alter_settings_race.reference b/dbms/tests/queries/0_stateless/00980_alter_settings_race.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_alter_settings_race.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00980_alter_settings_race.sh b/dbms/tests/queries/0_stateless/00980_alter_settings_race.sh new file mode 100755 index 00000000000..4a948841ed7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_alter_settings_race.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_for_concurrent_alter" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE table_for_concurrent_alter (id UInt64, Data String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity=4096;"; + +n=0 +while [ "$n" -lt 100 ]; +do + n=$(( n + 1 )) + $CLICKHOUSE_CLIENT --query="INSERT INTO table_for_concurrent_alter VALUES(1, 'Hello')" > /dev/null 2> /dev/null & + $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE table_for_concurrent_alter FINAL" > /dev/null 2> /dev/null & +done & + + +q=0 +while [ "$q" -lt 100 ]; +do + q=$(( q + 1 )) + counter=$(( 100 + q )) + $CLICKHOUSE_CLIENT --query="ALTER TABLE table_for_concurrent_alter MODIFY SETTING parts_to_throw_insert = $counter, parts_to_delay_insert = $counter, min_merge_bytes_to_use_direct_io = $counter" > /dev/null 2> /dev/null & +done & + +sleep 4 + +# we just test race conditions, not logic +$CLICKHOUSE_CLIENT --query "SELECT 1" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_for_concurrent_alter" diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql index cb1ba315001..866cf960710 100644 --- a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -35,7 +35,7 @@ INSERT INTO table_for_alter VALUES (2, '2'); -- { serverError 252 } ALTER TABLE table_for_alter MODIFY SETTING xxx_yyy=124; -- { serverError 115 } -ALTER TABLE table_for_alter MODIFY SETTING parts_to_throw_insert = 100, parts_to_delay_insert = 100; +ALTER TABLE table_for_alter MODIFY SETTING parts_to_throw_insert = 100, parts_to_delay_insert = 100; INSERT INTO table_for_alter VALUES (2, '2'); From 13e4581317126661fd0d7244976cf7bda854525d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2019 18:21:45 +0300 Subject: [PATCH 11/59] More general --- dbms/src/Storages/IStorage.cpp | 51 +++++++++++++++++-- dbms/src/Storages/IStorage.h | 13 +++++ dbms/src/Storages/Kafka/StorageKafka.cpp | 17 +++++++ dbms/src/Storages/Kafka/StorageKafka.h | 10 ++++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 22 ++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +- 6 files changed, 93 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 0fd1994f5ce..e748b72c769 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -1,6 +1,8 @@ #include #include +#include +#include #include #include @@ -19,6 +21,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TYPE_MISMATCH; extern const int SETTINGS_ARE_NOT_SUPPORTED; + extern const int UNKNOWN_SETTING; } IStorage::IStorage(ColumnsDescription columns_) @@ -295,7 +298,9 @@ bool IStorage::isVirtualColumn(const String & column_name) const bool IStorage::hasSetting(const String & /* setting_name */) const { - throw Exception("Storage '" + getName() + "' doesn't support settings.", ErrorCodes::SETTINGS_ARE_NOT_SUPPORTED); + if (!supportsSettings()) + throw Exception("Storage '" + getName() + "' doesn't support settings.", ErrorCodes::SETTINGS_ARE_NOT_SUPPORTED); + return false; } TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) @@ -352,6 +357,39 @@ TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id) return result; } + +void IStorage::alterSettings( + const SettingsChanges & new_changes, + const String & current_database_name, + const String & current_table_name, + const Context & context, + TableStructureWriteLockHolder & /* table_lock_holder */) +{ + IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) + { + if (!new_changes.empty()) + { + auto & storage_changes = ast.as().settings->changes; + /// Make storage settings unique + for (const auto & change : new_changes) + { + if (hasSetting(change.name)) + { + auto finder = [&change] (const SettingChange & c) { return c.name == change.name;}; + if (auto it = std::find_if(storage_changes.begin(), storage_changes.end(), finder); it != storage_changes.end()) + it->value = change.value; + else + storage_changes.push_back(change); + } + else + throw Exception{"Storage '" + getName() + "' doesn't have setting '" + change.name + "'", ErrorCodes::UNKNOWN_SETTING}; + } + } + }; + context.getDatabase(current_database_name)->alterTable(context, current_table_name, getColumns(), getIndices(), storage_modifier); +} + + void IStorage::alter( const AlterCommands & params, const String & database_name, @@ -359,12 +397,17 @@ void IStorage::alter( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - for (const auto & param : params) + if (params.isSettingsAlter()) { - if (param.isMutable()) - throw Exception("Method alter supports only change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + SettingsChanges new_changes; + params.applyForSettingsOnly(new_changes); + alterSettings(new_changes, database_name, table_name, context, table_lock_holder); + return; } + if (params.isMutable()) + throw Exception("Method alter supports only change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); auto new_columns = getColumns(); auto new_indices = getIndices(); diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 79760bae5a8..7000eef76f6 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,9 @@ public: /// Returns true if the storage supports deduplication of inserted data blocks. virtual bool supportsDeduplication() const { return false; } + /// Returns true if the storage supports settings. + virtual bool supportsSettings() const { return false; } + /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. using ColumnSizeByName = std::unordered_map; @@ -252,6 +256,15 @@ public: throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /** ALTER table settings if possible. Otherwise throws exception. + */ + virtual void alterSettings( + const SettingsChanges & new_changes, + const String & current_database_name, + const String & current_table_name, + const Context & context, + TableStructureWriteLockHolder & table_lock_holder); + /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 20599c7e4f8..47f4fc8c610 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -40,6 +40,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNSUPPORTED_METHOD; } namespace @@ -388,6 +389,22 @@ bool StorageKafka::streamToViews() } +bool StorageKafka::hasSetting(const String & setting_name) const +{ + return KafkaSettings::findIndex(setting_name) != KafkaSettings::npos; +} + +void StorageKafka::alterSettings( + const SettingsChanges & /* new_changes */, + const String & /* current_database_name */, + const String & /* current_table_name */, + const Context & /* context */, + TableStructureWriteLockHolder & /* table_lock_holder */) +{ + throw Exception("Storage '" + getName() + "' doesn't support settings alter", ErrorCodes::UNSUPPORTED_METHOD); +} + + void registerStorageKafka(StorageFactory & factory) { factory.registerStorage("Kafka", [](const StorageFactory::Arguments & args) diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index f9b6609def5..38a63e8536e 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -24,6 +24,7 @@ public: std::string getName() const override { return "Kafka"; } std::string getTableName() const override { return table_name; } std::string getDatabaseName() const override { return database_name; } + bool supportsSettings() const override { return true; } void startup() override; void shutdown() override; @@ -50,6 +51,15 @@ public: const auto & getSchemaName() const { return schema_name; } const auto & skipBroken() const { return skip_broken; } + bool hasSetting(const String & setting_name) const override; + + void alterSettings( + const SettingsChanges & new_changes, + const String & current_database_name, + const String & current_table_name, + const Context & context, + TableStructureWriteLockHolder & table_lock_holder) override; + protected: StorageKafka( const std::string & table_name_, diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 4f7452bd79f..b0453b04c8a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1588,26 +1588,10 @@ void MergeTreeData::alterSettings( const String & current_database_name, const String & current_table_name, const Context & context, - TableStructureWriteLockHolder & /* table_lock_holder */) + TableStructureWriteLockHolder & table_lock_holder) { settings.updateFromChanges(new_changes); - IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) - { - if (!new_changes.empty()) - { - auto & storage_changes = ast.as().settings->changes; - /// Make storage settings unique - for (const auto & change : new_changes) - { - auto finder = [&change] (const SettingChange & c) { return c.name == change.name;}; - if (auto it = std::find_if(storage_changes.begin(), storage_changes.end(), finder); it != storage_changes.end()) - it->value = change.value; - else - storage_changes.push_back(change); - } - } - }; - context.getDatabase(current_database_name)->alterTable(context, current_table_name, getColumns(), getIndices(), storage_modifier); + IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); } bool MergeTreeData::hasSetting(const String & setting_name) const @@ -2245,7 +2229,7 @@ std::optional MergeTreeData::getMinPartDataVersion() const } -void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event *until) const +void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const { const size_t parts_count_in_total = getPartsCount(); if (parts_count_in_total >= settings.max_parts_in_total) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 78c7df3bb48..756c188c724 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -342,6 +342,8 @@ public: || merging_params.mode == MergingParams::VersionedCollapsing; } + bool supportsSettings() const override { return true; } + bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const override; NameAndTypePair getColumn(const String & column_name) const override @@ -516,7 +518,7 @@ public: const String & current_database_name, const String & current_table_name, const Context & context, - TableStructureWriteLockHolder & table_lock_holder); + TableStructureWriteLockHolder & table_lock_holder) override; /// All MergeTreeData children have settings. bool hasSetting(const String & setting_name) const override; From 7409f1a3de81b5698364d9381ec113195b1cfdb3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2019 18:33:10 +0300 Subject: [PATCH 12/59] More comments --- dbms/src/Core/Settings.h | 1 + dbms/src/Core/SettingsCommon.h | 3 +++ dbms/src/Storages/Kafka/KafkaSettings.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeSettings.h | 1 + 4 files changed, 7 insertions(+) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 0c5a4ceed60..54f6a14205b 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -42,6 +42,7 @@ struct Settings : public SettingsCollection * but we are not going to do it, because settings is used everywhere as static struct fields. */ +/// M (mutable) for normal settings, IM (immutable) for not updateable settings. #define LIST_OF_SETTINGS(M, IM) \ M(SettingUInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 4b3f7020a6e..d4607e70904 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -321,6 +321,9 @@ private: size_t offset_of_changed; StringRef name; StringRef description; + /// Can be updated after first load for config/definition. + /// Non updatable settings can be isChanged, + /// if they were overwritten in config/definition. bool updateable; GetStringFunction get_string; GetFieldFunction get_field; diff --git a/dbms/src/Storages/Kafka/KafkaSettings.h b/dbms/src/Storages/Kafka/KafkaSettings.h index fd8d9b3dc08..e43ea7cd70e 100644 --- a/dbms/src/Storages/Kafka/KafkaSettings.h +++ b/dbms/src/Storages/Kafka/KafkaSettings.h @@ -14,6 +14,8 @@ class ASTStorage; struct KafkaSettings : public SettingsCollection { + +/// M (mutable) for normal settings, IM (immutable) for not updateable settings. #define LIST_OF_KAFKA_SETTINGS(M, IM) \ M(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ M(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 3012b9e1694..dc94ad5b11f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -24,6 +24,7 @@ class ASTStorage; struct MergeTreeSettings : public SettingsCollection { +/// M (mutable) for normal settings, IM (immutable) for not updateable settings. #define LIST_OF_MERGE_TREE_SETTINGS(M, IM) \ IM(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ \ From 75c3ed967a9fe5fb780b0107b9887bf343d67a15 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2019 19:13:28 +0300 Subject: [PATCH 13/59] Checking updatable for user settings --- dbms/src/Interpreters/Context.cpp | 11 +++++++++++ dbms/src/Interpreters/Context.h | 3 +++ dbms/src/Interpreters/InterpreterSetQuery.cpp | 4 ++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 992593d852c..2484342908f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1126,6 +1126,17 @@ void Context::applySettingsChanges(const SettingsChanges & changes) applySettingChange(change); } +void Context::updateSettingsChanges(const SettingsChanges & changes) +{ + auto lock = getLock(); + for (const SettingChange & change : changes) + { + if (change.name == "profile") + setProfile(change.value.safeGet()); + else + settings.updateFromChange(change); + } +} void Context::checkSettingsConstraints(const SettingChange & change) { diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 50b7ab3eba2..4f97922a513 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -281,6 +281,9 @@ public: void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); + /// Update checking that each setting is updatable + void updateSettingsChanges(const SettingsChanges & changes); + /// Checks the constraints. void checkSettingsConstraints(const SettingChange & change); void checkSettingsConstraints(const SettingsChanges & changes); diff --git a/dbms/src/Interpreters/InterpreterSetQuery.cpp b/dbms/src/Interpreters/InterpreterSetQuery.cpp index f92e9638822..ae982611e60 100644 --- a/dbms/src/Interpreters/InterpreterSetQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSetQuery.cpp @@ -10,7 +10,7 @@ BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); context.checkSettingsConstraints(ast.changes); - context.getSessionContext().applySettingsChanges(ast.changes); + context.getSessionContext().updateSettingsChanges(ast.changes); return {}; } @@ -19,7 +19,7 @@ void InterpreterSetQuery::executeForCurrentContext() { const auto & ast = query_ptr->as(); context.checkSettingsConstraints(ast.changes); - context.applySettingsChanges(ast.changes); + context.updateSettingsChanges(ast.changes); } } From a03fcd9f121ac5ad655a4e714175266213d6666f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 8 Aug 2019 22:29:56 +0300 Subject: [PATCH 14/59] Make settings values atomic to avoid race conditions --- dbms/programs/copier/ClusterCopier.cpp | 2 +- dbms/src/Core/SettingsCommon.cpp | 26 +++++--- dbms/src/Core/SettingsCommon.h | 63 +++++++++++++++---- .../LogicalExpressionsOptimizer.cpp | 4 +- .../LogicalExpressionsOptimizer.h | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- .../Storages/MergeTree/MergeTreeSettings.cpp | 2 +- 8 files changed, 75 insertions(+), 30 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 43158dedd71..0c383ace576 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -708,7 +708,7 @@ void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & c auto set_default_value = [] (auto && setting, auto && default_value) { - setting = setting.changed ? setting.value : default_value; + setting = setting.changed ? setting.getValue() : default_value; }; /// Override important settings diff --git a/dbms/src/Core/SettingsCommon.cpp b/dbms/src/Core/SettingsCommon.cpp index 988a4b2d736..639e1dea3ee 100644 --- a/dbms/src/Core/SettingsCommon.cpp +++ b/dbms/src/Core/SettingsCommon.cpp @@ -34,19 +34,19 @@ namespace ErrorCodes template String SettingNumber::toString() const { - return DB::toString(value); + return DB::toString(value.load(std::memory_order_relaxed)); } template Field SettingNumber::toField() const { - return value; + return value.load(std::memory_order_relaxed); } template void SettingNumber::set(Type x) { - value = x; + value.store(x, std::memory_order_relaxed); changed = true; } @@ -136,17 +136,17 @@ template struct SettingNumber; String SettingMaxThreads::toString() const { /// Instead of the `auto` value, we output the actual value to make it easier to see. - return is_auto ? ("auto(" + DB::toString(value) + ")") : DB::toString(value); + return is_auto ? ("auto(" + DB::toString(getValue()) + ")") : DB::toString(getValue()); } Field SettingMaxThreads::toField() const { - return is_auto ? 0 : value; + return is_auto ? 0 : getValue(); } void SettingMaxThreads::set(UInt64 x) { - value = x ? x : getAutoValue(); + value.store(x ? x : getAutoValue(), std::memory_order_relaxed); is_auto = x == 0; changed = true; } @@ -169,7 +169,7 @@ void SettingMaxThreads::set(const String & x) void SettingMaxThreads::serialize(WriteBuffer & buf) const { - writeVarUInt(is_auto ? 0 : value, buf); + writeVarUInt(is_auto ? 0 : getValue(), buf); } void SettingMaxThreads::deserialize(ReadBuffer & buf) @@ -195,18 +195,21 @@ UInt64 SettingMaxThreads::getAutoValue() const template String SettingTimespan::toString() const { + std::lock_guard lock(m); return DB::toString(value.totalMicroseconds() / microseconds_per_io_unit); } template Field SettingTimespan::toField() const { + std::lock_guard lock(m); return value.totalMicroseconds() / microseconds_per_io_unit; } template void SettingTimespan::set(const Poco::Timespan & x) { + std::lock_guard lock(m); value = x; changed = true; } @@ -235,6 +238,7 @@ void SettingTimespan::set(const String & x) template void SettingTimespan::serialize(WriteBuffer & buf) const { + std::lock_guard lock(m); writeVarUInt(value.totalMicroseconds() / microseconds_per_io_unit, buf); } @@ -252,16 +256,19 @@ template struct SettingTimespan; String SettingString::toString() const { + std::lock_guard lock(m); return value; } Field SettingString::toField() const { + std::lock_guard lock(m); return value; } void SettingString::set(const String & x) { + std::lock_guard lock(m); value = x; changed = true; } @@ -273,6 +280,7 @@ void SettingString::set(const Field & x) void SettingString::serialize(WriteBuffer & buf) const { + std::lock_guard lock(m); writeBinary(value, buf); } @@ -296,7 +304,7 @@ Field SettingChar::toField() const void SettingChar::set(char x) { - value = x; + value.store(x, std::memory_order_relaxed); changed = true; } @@ -351,7 +359,7 @@ void SettingEnum::deserialize(ReadBuffer & buf) { \ using EnumType = ENUM_NAME; \ using UnderlyingType = std::underlying_type::type; \ - switch (static_cast(value)) \ + switch (static_cast(getValue())) \ { \ LIST_OF_NAMES_MACRO(IMPLEMENT_SETTING_ENUM_TO_STRING_HELPER_) \ } \ diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index d4607e70904..46bf5bca5ce 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -32,13 +33,23 @@ namespace ErrorCodes template struct SettingNumber { - Type value; +public: + /// The value is atomic, because we want to avoid race conditions on value precisely. + /// It doesn't gurantee atomicy on whole structure. It just helps to avoid the most common + /// case: when we change setting from one thread and use in another (for example in MergeTreeSettings). + /// + std::atomic value; + bool changed = false; SettingNumber(Type x = 0) : value(x) {} + SettingNumber(const SettingNumber & o) : value(o.getValue()), changed(o.changed) { } + + operator Type() const { return getValue(); } + Type getValue() const { return value.load(std::memory_order_relaxed); } - operator Type() const { return value; } SettingNumber & operator= (Type x) { set(x); return *this; } + SettingNumber & operator= (SettingNumber o) { set(o.getValue()); return *this; } /// Serialize to a test string. String toString() const; @@ -73,14 +84,23 @@ using SettingBool = SettingNumber; */ struct SettingMaxThreads { - UInt64 value; + std::atomic value; bool is_auto; bool changed = false; SettingMaxThreads(UInt64 x = 0) : value(x ? x : getAutoValue()), is_auto(x == 0) {} + SettingMaxThreads(const SettingMaxThreads & o) + : is_auto(o.is_auto) + , changed(o.changed) + { + value.store(o.value, std::memory_order_relaxed); + } + + operator UInt64() const { return getValue(); } + UInt64 getValue() const { return value.load(std::memory_order_relaxed); } - operator UInt64() const { return value; } SettingMaxThreads & operator= (UInt64 x) { set(x); return *this; } + SettingMaxThreads & operator= (const SettingMaxThreads & o) { set(o.getValue()); return *this; } String toString() const; Field toField() const; @@ -102,16 +122,20 @@ enum class SettingTimespanIO { MILLISECOND, SECOND }; template struct SettingTimespan { + mutable std::mutex m; Poco::Timespan value; bool changed = false; SettingTimespan(UInt64 x = 0) : value(x * microseconds_per_io_unit) {} + SettingTimespan(const SettingTimespan & o) : value(o.value), changed(o.changed) {} - operator Poco::Timespan() const { return value; } + operator Poco::Timespan() const { return getValue(); } + Poco::Timespan getValue() const { std::lock_guard guard(m); return value; } SettingTimespan & operator= (const Poco::Timespan & x) { set(x); return *this; } + SettingTimespan & operator= (const SettingTimespan & o) { set(o.value); return *this; } - Poco::Timespan::TimeDiff totalSeconds() const { return value.totalSeconds(); } - Poco::Timespan::TimeDiff totalMilliseconds() const { return value.totalMilliseconds(); } + Poco::Timespan::TimeDiff totalSeconds() const { return getValue().totalSeconds(); } + Poco::Timespan::TimeDiff totalMilliseconds() const { return getValue().totalMilliseconds(); } String toString() const; Field toField() const; @@ -134,13 +158,18 @@ using SettingMilliseconds = SettingTimespan; struct SettingString { + mutable std::mutex m; String value; bool changed = false; SettingString(const String & x = String{}) : value(x) {} + SettingString(const SettingString & o) : value(o.value), changed(o.changed) {} + + operator String() const { return getValue(); } + String getValue() const { std::lock_guard guard(m); return value; } - operator String() const { return value; } SettingString & operator= (const String & x) { set(x); return *this; } + SettingString & operator= (const SettingString & o) { set(o.value); return *this; } String toString() const; Field toField() const; @@ -156,13 +185,15 @@ struct SettingString struct SettingChar { public: - char value; + std::atomic value; bool changed = false; SettingChar(char x = '\0') : value(x) {} + SettingChar(const SettingChar & o) : value(o.getValue()), changed(o.changed) { } - operator char() const { return value; } + operator char() const { return getValue(); } SettingChar & operator= (char x) { set(x); return *this; } + SettingChar & operator= (const SettingChar & o) { set(o.getValue()); return *this; } String toString() const; Field toField() const; @@ -173,6 +204,8 @@ public: void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); + + char getValue() const { return value.load(std::memory_order_relaxed); } }; @@ -180,23 +213,27 @@ public: template struct SettingEnum { - EnumType value; + std::atomic value; bool changed = false; SettingEnum(EnumType x) : value(x) {} + SettingEnum(const SettingEnum & o) : value(o.getValue()), changed(o.changed) { } - operator EnumType() const { return value; } + operator EnumType() const { return getValue(); } SettingEnum & operator= (EnumType x) { set(x); return *this; } + SettingEnum & operator= (SettingEnum o) { set(o.getValue()); return *this; } String toString() const; Field toField() const { return toString(); } - void set(EnumType x) { value = x; changed = true; } + void set(EnumType x) { value.store(x, std::memory_order_relaxed); changed = true; } void set(const Field & x) { set(safeGet(x)); } void set(const String & x); void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); + + EnumType getValue() const { return value.load(std::memory_order_relaxed); } }; diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp index 1e00dd499ba..ca8fbb1c8f4 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -30,8 +30,8 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression); } -LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_) - : select_query(select_query_), settings(settings_) +LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length) + : select_query(select_query_), settings(optimize_min_equality_disjunction_chain_length) { } diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h index 09c3931ce1d..fa5289b3f5f 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h @@ -36,7 +36,7 @@ class LogicalExpressionsOptimizer final public: /// Constructor. Accepts the root of the query DAG. - LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_); + LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length); /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN * on the expressions `expr` IN (x1, ..., xN). diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 48f40d17cb3..464ef149e35 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -596,7 +596,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( InJoinSubqueriesPreprocessor(context).visit(query); /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length).perform(); } /// Creates a dictionary `aliases`: alias -> ASTPtr diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ccbcd04857d..513a713651b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -400,8 +400,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( if (relative_sample_size == RelativeSize(0)) relative_sample_size = 1; - relative_sample_size /= settings.parallel_replicas_count.value; - relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); + relative_sample_size /= settings.parallel_replicas_count.getValue(); + relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.getValue()); } if (relative_sample_offset >= RelativeSize(1)) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 9eee33554ab..425a0851d67 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -69,7 +69,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) if (std::find_if(changes.begin(), changes.end(), \ [](const SettingChange & c) { return c.name == #NAME; }) \ == changes.end()) \ - changes.push_back(SettingChange{#NAME, NAME.value}); + changes.push_back(SettingChange{#NAME, NAME.getValue()}); APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(ADD_IF_ABSENT) #undef ADD_IF_ABSENT From 50cabe4ab0b5cfc492a88bed1ddfd8ebef1dd723 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 8 Aug 2019 22:30:32 +0300 Subject: [PATCH 15/59] Comment --- dbms/src/Core/SettingsCommon.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 46bf5bca5ce..0939ccac7ca 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -37,7 +37,6 @@ public: /// The value is atomic, because we want to avoid race conditions on value precisely. /// It doesn't gurantee atomicy on whole structure. It just helps to avoid the most common /// case: when we change setting from one thread and use in another (for example in MergeTreeSettings). - /// std::atomic value; bool changed = false; From 2803fcc2bab351348672b20c95c380fa5703cd9b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 9 Aug 2019 16:02:01 +0300 Subject: [PATCH 16/59] Make atomic settings --- CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 2 +- dbms/programs/copier/ClusterCopier.cpp | 2 +- dbms/programs/server/TCPHandler.cpp | 6 +- dbms/src/Core/SettingsCommon.cpp | 135 ++++++++++++---- dbms/src/Core/SettingsCommon.h | 151 +++++++++++------- .../ClusterProxy/executeQuery.cpp | 6 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Interpreters/ThreadStatusExt.cpp | 2 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 18 +-- .../ReplicatedMergeTreeCleanupThread.cpp | 8 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageJoin.cpp | 4 +- 13 files changed, 230 insertions(+), 110 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e03af27cfa..c0fbdd51f0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -350,7 +350,7 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_L endif () # Add Libc. GLIBC is actually a collection of interdependent libraries. - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lrt -ldl -lpthread -lm -lc") + set (DEFAULT_LIBS "${DEFAULT_LIBS} -lrt -ldl -lpthread -lm -lc -latomic") # Note: we'd rather use Musl libc library, but it's little bit more difficult to use. diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 091a1ac063f..509e5393c34 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -323,7 +323,7 @@ private: insert_format = "Values"; /// Setting value from cmd arg overrides one from config - if (context.getSettingsRef().max_insert_block_size.changed) + if (context.getSettingsRef().max_insert_block_size.isChanged()) insert_format_max_block_size = context.getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", context.getSettingsRef().max_insert_block_size); diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 0c383ace576..a63cd54a003 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -708,7 +708,7 @@ void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & c auto set_default_value = [] (auto && setting, auto && default_value) { - setting = setting.changed ? setting.getValue() : default_value; + setting = setting.isChanged() ? setting.getValue() : default_value; }; /// Override important settings diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 5091258acaf..c20a466e5b5 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -182,11 +182,11 @@ void TCPHandler::runImpl() /// Should we send internal logs to client? const auto client_logs_level = query_context->getSettingsRef().send_logs_level; if (client_revision >= DBMS_MIN_REVISION_WITH_SERVER_LOGS - && client_logs_level.value != LogsLevel::none) + && client_logs_level != LogsLevel::none) { state.logs_queue = std::make_shared(); state.logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString()); - CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level.value); + CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level); } query_context->setExternalTablesInitializer([&connection_settings, this] (Context & context) @@ -329,7 +329,7 @@ void TCPHandler::readData(const Settings & connection_settings) const auto receive_timeout = query_context->getSettingsRef().receive_timeout.value; /// Poll interval should not be greater than receive_timeout - const size_t default_poll_interval = connection_settings.poll_interval.value * 1000000; + const size_t default_poll_interval = connection_settings.poll_interval * 1000000; size_t current_poll_interval = static_cast(receive_timeout.totalMicroseconds()); constexpr size_t min_poll_interval = 5000; // 5 ms size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval)); diff --git a/dbms/src/Core/SettingsCommon.cpp b/dbms/src/Core/SettingsCommon.cpp index 639e1dea3ee..2d3ed4f6e14 100644 --- a/dbms/src/Core/SettingsCommon.cpp +++ b/dbms/src/Core/SettingsCommon.cpp @@ -34,20 +34,19 @@ namespace ErrorCodes template String SettingNumber::toString() const { - return DB::toString(value.load(std::memory_order_relaxed)); + return DB::toString(getValue()); } template Field SettingNumber::toField() const { - return value.load(std::memory_order_relaxed); + return getValue(); } template void SettingNumber::set(Type x) { - value.store(x, std::memory_order_relaxed); - changed = true; + data.store(Data{x, true}, std::memory_order_relaxed); } template @@ -59,6 +58,14 @@ void SettingNumber::set(const Field & x) set(applyVisitor(FieldVisitorConvertToNumber(), x)); } + +template +SettingNumber & SettingNumber::operator= (const SettingNumber & o) +{ + data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; +} + template void SettingNumber::set(const String & x) { @@ -93,9 +100,9 @@ template void SettingNumber::serialize(WriteBuffer & buf) const { if constexpr (std::is_integral_v && std::is_unsigned_v) - writeVarUInt(static_cast(value), buf); + writeVarUInt(static_cast(getValue()), buf); else if constexpr (std::is_integral_v && std::is_signed_v) - writeVarInt(static_cast(value), buf); + writeVarInt(static_cast(getValue()), buf); else { static_assert(std::is_floating_point_v); @@ -133,22 +140,28 @@ template struct SettingNumber; template struct SettingNumber; +SettingMaxThreads & SettingMaxThreads::operator= (const SettingMaxThreads & o) +{ + data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; +} + String SettingMaxThreads::toString() const { + auto d = data.load(std::memory_order_relaxed); /// Instead of the `auto` value, we output the actual value to make it easier to see. - return is_auto ? ("auto(" + DB::toString(getValue()) + ")") : DB::toString(getValue()); + return d.is_auto ? ("auto(" + DB::toString(d.value) + ")") : DB::toString(d.value); } Field SettingMaxThreads::toField() const { - return is_auto ? 0 : getValue(); + auto d = data.load(std::memory_order_relaxed); + return d.is_auto ? 0 : d.value; } void SettingMaxThreads::set(UInt64 x) { - value.store(x ? x : getAutoValue(), std::memory_order_relaxed); - is_auto = x == 0; - changed = true; + data.store({x ? x : getAutoValue(), x == 0, true}); } void SettingMaxThreads::set(const Field & x) @@ -169,7 +182,8 @@ void SettingMaxThreads::set(const String & x) void SettingMaxThreads::serialize(WriteBuffer & buf) const { - writeVarUInt(is_auto ? 0 : getValue(), buf); + auto d = data.load(std::memory_order_relaxed); + writeVarUInt(d.is_auto ? 0 : d.value, buf); } void SettingMaxThreads::deserialize(ReadBuffer & buf) @@ -181,8 +195,7 @@ void SettingMaxThreads::deserialize(ReadBuffer & buf) void SettingMaxThreads::setAuto() { - value = getAutoValue(); - is_auto = true; + data.store({getAutoValue(), true, isChanged()}); } UInt64 SettingMaxThreads::getAutoValue() const @@ -191,25 +204,54 @@ UInt64 SettingMaxThreads::getAutoValue() const return res; } +void SettingMaxThreads::setChanged(bool changed) +{ + auto d = data.load(std::memory_order_relaxed); + data.store({d.value, d.is_auto, changed}); +} + + +template +SettingTimespan & SettingTimespan::operator= (const SettingTimespan & o) +{ + std::shared_lock lock_o(o.mutex); + value = o.value; + changed = o.changed; + return *this; +} + +template +SettingTimespan::SettingTimespan(const SettingTimespan & o) +{ + std::shared_lock lock_o(o.mutex); + value = o.value; + changed = o.changed; +} + + +template +void SettingTimespan::setChanged(bool c) +{ + std::unique_lock lock(mutex); + changed = c; +} template String SettingTimespan::toString() const { - std::lock_guard lock(m); - return DB::toString(value.totalMicroseconds() / microseconds_per_io_unit); + return DB::toString(getValue().totalMicroseconds() / microseconds_per_io_unit); } template Field SettingTimespan::toField() const { - std::lock_guard lock(m); - return value.totalMicroseconds() / microseconds_per_io_unit; + return getValue().totalMicroseconds() / microseconds_per_io_unit; } template void SettingTimespan::set(const Poco::Timespan & x) { - std::lock_guard lock(m); + std::unique_lock lock(mutex); value = x; changed = true; } @@ -238,8 +280,7 @@ void SettingTimespan::set(const String & x) template void SettingTimespan::serialize(WriteBuffer & buf) const { - std::lock_guard lock(m); - writeVarUInt(value.totalMicroseconds() / microseconds_per_io_unit, buf); + writeVarUInt(getValue().totalMicroseconds() / microseconds_per_io_unit, buf); } template @@ -253,26 +294,47 @@ void SettingTimespan::deserialize(ReadBuffer & buf) template struct SettingTimespan; template struct SettingTimespan; +SettingString & SettingString::operator= (const SettingString & o) +{ + std::shared_lock lock_o(o.mutex); + value = o.value; + changed = o.changed; + return *this; +} + +SettingString::SettingString(const SettingString & o) +{ + std::shared_lock lock(o.mutex); + value = o.value; + changed = o.changed; +} + String SettingString::toString() const { - std::lock_guard lock(m); + std::shared_lock lock(mutex); return value; } Field SettingString::toField() const { - std::lock_guard lock(m); + std::shared_lock lock(mutex); return value; } void SettingString::set(const String & x) { - std::lock_guard lock(m); + std::unique_lock lock(mutex); value = x; changed = true; } +void SettingString::setChanged(bool c) +{ + std::unique_lock lock(mutex); + changed = c; +} + void SettingString::set(const Field & x) { set(safeGet(x)); @@ -280,7 +342,6 @@ void SettingString::set(const Field & x) void SettingString::serialize(WriteBuffer & buf) const { - std::lock_guard lock(m); writeBinary(value, buf); } @@ -291,10 +352,15 @@ void SettingString::deserialize(ReadBuffer & buf) set(s); } +SettingChar & SettingChar::operator= (const SettingChar & o) +{ + data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; +} String SettingChar::toString() const { - return String(1, value); + return String(1, getValue()); } Field SettingChar::toField() const @@ -304,8 +370,7 @@ Field SettingChar::toField() const void SettingChar::set(char x) { - value.store(x, std::memory_order_relaxed); - changed = true; + data.store({x, true}); } void SettingChar::set(const String & x) @@ -335,6 +400,19 @@ void SettingChar::deserialize(ReadBuffer & buf) } +template +SettingEnum & SettingEnum::operator= (const SettingEnum & o) +{ + data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; +} + +template +void SettingEnum::set(EnumType x) +{ + data.store({x, true}, std::memory_order_relaxed); +} + template void SettingEnum::serialize(WriteBuffer & buf) const { @@ -350,6 +428,7 @@ void SettingEnum::deserialize(ReadBuffer & buf) } + #define IMPLEMENT_SETTING_ENUM(ENUM_NAME, LIST_OF_NAMES_MACRO, ERROR_CODE_FOR_UNEXPECTED_NAME) \ IMPLEMENT_SETTING_ENUM_WITH_TAG(ENUM_NAME, void, LIST_OF_NAMES_MACRO, ERROR_CODE_FOR_UNEXPECTED_NAME) diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 0939ccac7ca..83d90c28f53 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -33,22 +34,25 @@ namespace ErrorCodes template struct SettingNumber { -public: - /// The value is atomic, because we want to avoid race conditions on value precisely. - /// It doesn't gurantee atomicy on whole structure. It just helps to avoid the most common - /// case: when we change setting from one thread and use in another (for example in MergeTreeSettings). - std::atomic value; + struct Data + { + Type value; + bool changed; + }; - bool changed = false; + std::atomic data; - SettingNumber(Type x = 0) : value(x) {} - SettingNumber(const SettingNumber & o) : value(o.getValue()), changed(o.changed) { } + SettingNumber(Type x = 0) : data{{x, false}} {} + SettingNumber(const SettingNumber & o) : data{o.data.load(std::memory_order_relaxed)} {} + + bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } + void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed); } operator Type() const { return getValue(); } - Type getValue() const { return value.load(std::memory_order_relaxed); } + Type getValue() const { return data.load(std::memory_order_relaxed).value; } SettingNumber & operator= (Type x) { set(x); return *this; } - SettingNumber & operator= (SettingNumber o) { set(o.getValue()); return *this; } + SettingNumber & operator= (const SettingNumber & o); /// Serialize to a test string. String toString() const; @@ -83,23 +87,26 @@ using SettingBool = SettingNumber; */ struct SettingMaxThreads { - std::atomic value; - bool is_auto; - bool changed = false; - - SettingMaxThreads(UInt64 x = 0) : value(x ? x : getAutoValue()), is_auto(x == 0) {} - SettingMaxThreads(const SettingMaxThreads & o) - : is_auto(o.is_auto) - , changed(o.changed) + struct Data { - value.store(o.value, std::memory_order_relaxed); - } + UInt64 value; + bool is_auto; + bool changed; + }; + + std::atomic data; + + SettingMaxThreads(UInt64 x = 0) : data{{x ? x : getAutoValue(), x == 0, false}} {} + SettingMaxThreads(const SettingMaxThreads & o) : data{o.data.load(std::memory_order_relaxed)} {} + + bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } + void setChanged(bool changed); operator UInt64() const { return getValue(); } - UInt64 getValue() const { return value.load(std::memory_order_relaxed); } + UInt64 getValue() const { return data.load(std::memory_order_relaxed).value; } SettingMaxThreads & operator= (UInt64 x) { set(x); return *this; } - SettingMaxThreads & operator= (const SettingMaxThreads & o) { set(o.getValue()); return *this; } + SettingMaxThreads & operator= (const SettingMaxThreads & o); String toString() const; Field toField() const; @@ -111,6 +118,7 @@ struct SettingMaxThreads void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); + bool isAuto() const { return data.load(std::memory_order_relaxed).is_auto; } void setAuto(); UInt64 getAutoValue() const; }; @@ -121,20 +129,37 @@ enum class SettingTimespanIO { MILLISECOND, SECOND }; template struct SettingTimespan { - mutable std::mutex m; + mutable std::shared_mutex mutex; Poco::Timespan value; bool changed = false; SettingTimespan(UInt64 x = 0) : value(x * microseconds_per_io_unit) {} - SettingTimespan(const SettingTimespan & o) : value(o.value), changed(o.changed) {} + SettingTimespan(const SettingTimespan & o); operator Poco::Timespan() const { return getValue(); } - Poco::Timespan getValue() const { std::lock_guard guard(m); return value; } + Poco::Timespan getValue() const { std::shared_lock lock(mutex); return value; } SettingTimespan & operator= (const Poco::Timespan & x) { set(x); return *this; } - SettingTimespan & operator= (const SettingTimespan & o) { set(o.value); return *this; } + SettingTimespan & operator= (const SettingTimespan & o); - Poco::Timespan::TimeDiff totalSeconds() const { return getValue().totalSeconds(); } - Poco::Timespan::TimeDiff totalMilliseconds() const { return getValue().totalMilliseconds(); } + Poco::Timespan::TimeDiff totalSeconds() const + { + std::shared_lock lock(mutex); + return value.totalSeconds(); + } + + Poco::Timespan::TimeDiff totalMilliseconds() const + { + std::shared_lock lock(mutex); + return value.totalMilliseconds(); + } + + bool isChanged() const + { + std::shared_lock lock(mutex); + return changed; + } + + void setChanged(bool changed); String toString() const; Field toField() const; @@ -157,18 +182,19 @@ using SettingMilliseconds = SettingTimespan; struct SettingString { - mutable std::mutex m; + mutable std::shared_mutex mutex; String value; bool changed = false; SettingString(const String & x = String{}) : value(x) {} - SettingString(const SettingString & o) : value(o.value), changed(o.changed) {} - - operator String() const { return getValue(); } - String getValue() const { std::lock_guard guard(m); return value; } + SettingString(const SettingString & o); + operator String() const { return getValue(); } + String getValue() const { std::shared_lock lock(mutex); return value; } SettingString & operator= (const String & x) { set(x); return *this; } - SettingString & operator= (const SettingString & o) { set(o.value); return *this; } + SettingString & operator= (const SettingString & o); + bool isChanged() const { std::shared_lock lock(mutex); return changed; } + void setChanged(bool changed); String toString() const; Field toField() const; @@ -184,15 +210,25 @@ struct SettingString struct SettingChar { public: - std::atomic value; - bool changed = false; + struct Data + { + char value; + bool changed; + }; - SettingChar(char x = '\0') : value(x) {} - SettingChar(const SettingChar & o) : value(o.getValue()), changed(o.changed) { } + std::atomic data; + + SettingChar(char x = '\0') : data({x, false}) {} + SettingChar(const SettingChar & o) : data{o.data.load(std::memory_order_relaxed)} {} operator char() const { return getValue(); } + char getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingChar & operator= (char x) { set(x); return *this; } - SettingChar & operator= (const SettingChar & o) { set(o.getValue()); return *this; } + SettingChar & operator= (const SettingChar & o); + + bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } + void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed);} String toString() const; Field toField() const; @@ -203,8 +239,6 @@ public: void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); - - char getValue() const { return value.load(std::memory_order_relaxed); } }; @@ -212,27 +246,35 @@ public: template struct SettingEnum { - std::atomic value; - bool changed = false; + struct Data + { + EnumType value; + bool changed; + }; - SettingEnum(EnumType x) : value(x) {} - SettingEnum(const SettingEnum & o) : value(o.getValue()), changed(o.changed) { } + std::atomic data; + + SettingEnum(EnumType x) : data({x, false}) {} + SettingEnum(const SettingEnum & o) : data{o.data.load(std::memory_order_relaxed)} {} operator EnumType() const { return getValue(); } + EnumType getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingEnum & operator= (EnumType x) { set(x); return *this; } - SettingEnum & operator= (SettingEnum o) { set(o.getValue()); return *this; } + SettingEnum & operator= (const SettingEnum & o); + + bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } + void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed);} String toString() const; Field toField() const { return toString(); } - void set(EnumType x) { value.store(x, std::memory_order_relaxed); changed = true; } + void set(EnumType x); void set(const Field & x) { set(safeGet(x)); } void set(const String & x); void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); - - EnumType getValue() const { return value.load(std::memory_order_relaxed); } }; @@ -344,6 +386,7 @@ private: Derived & castToDerived() { return *static_cast(this); } const Derived & castToDerived() const { return *static_cast(this); } + using IsChangedFunction = bool (*)(const Derived &); using GetStringFunction = String (*)(const Derived &); using GetFieldFunction = Field (*)(const Derived &); using SetStringFunction = void (*)(Derived &, const String &); @@ -354,7 +397,7 @@ private: struct MemberInfo { - size_t offset_of_changed; + IsChangedFunction is_changed; StringRef name; StringRef description; /// Can be updated after first load for config/definition. @@ -369,7 +412,7 @@ private: DeserializeFunction deserialize; CastValueWithoutApplyingFunction cast_value_without_applying; - bool isChanged(const Derived & collection) const { return *reinterpret_cast(reinterpret_cast(&collection) + offset_of_changed); } + bool isChanged(const Derived & collection) const { return is_changed(collection); } }; class MemberInfos @@ -729,8 +772,7 @@ public: #define IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - static_assert(std::is_same_v().NAME.changed), bool>); \ - add({offsetof(Derived, NAME.changed), \ + add({[](const Derived & d) { return d.NAME.isChanged(); }, \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ @@ -738,8 +780,7 @@ public: &Functions::NAME##_castValueWithoutApplying }); #define IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - static_assert(std::is_same_v().NAME.changed), bool>); \ - add({offsetof(Derived, NAME.changed), \ + add({[](const Derived & d) { return d.NAME.isChanged(); }, \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index 1a202e064f1..e4e52eab2b7 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -26,9 +26,9 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.max_memory_usage_for_all_queries = 0; /// Set as unchanged to avoid sending to remote server. - new_settings.max_concurrent_queries_for_user.changed = false; - new_settings.max_memory_usage_for_user.changed = false; - new_settings.max_memory_usage_for_all_queries.changed = false; + new_settings.max_concurrent_queries_for_user.setChanged(false); + new_settings.max_memory_usage_for_user.setChanged(false); + new_settings.max_memory_usage_for_all_queries.setChanged(false); Context new_context(context); new_context.setSettings(new_settings); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 464ef149e35..465e0fb786f 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -596,7 +596,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( InJoinSubqueriesPreprocessor(context).visit(query); /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length).perform(); + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.getValue()).perform(); } /// Creates a dictionary `aliases`: alias -> ASTPtr diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index 8c46a3ba08f..28740417b71 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -251,7 +251,7 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log) { elem.client_info = query_context->getClientInfo(); - if (query_context->getSettingsRef().log_profile_events.value != 0) + if (query_context->getSettingsRef().log_profile_events != 0) { /// NOTE: Here we are in the same thread, so we can make memcpy() elem.profile_counters = std::make_shared(performance_counters.getPartiallyAtomicSnapshot()); diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 47f4fc8c610..2d431ed1368 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -199,7 +199,7 @@ BufferPtr StorageKafka::createBuffer() const Settings & settings = global_context.getSettingsRef(); size_t batch_size = max_block_size; if (!batch_size) - batch_size = settings.max_block_size.value; + batch_size = settings.max_block_size; size_t poll_timeout = settings.stream_poll_timeout_ms.totalMilliseconds(); return std::make_shared( @@ -350,7 +350,7 @@ bool StorageKafka::streamToViews() const Settings & settings = global_context.getSettingsRef(); size_t block_size = max_block_size; if (block_size == 0) - block_size = settings.max_block_size.value; + block_size = settings.max_block_size; // Create a stream for each consumer and join them in a union stream InterpreterInsertQuery interpreter{insert, global_context}; @@ -436,7 +436,7 @@ void registerStorageKafka(StorageFactory & factory) #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME) \ /* One of the four required arguments is not specified */ \ if (args_count < ARG_NUM && ARG_NUM <= 4 && \ - !kafka_settings.PAR_NAME.changed) \ + !kafka_settings.PAR_NAME.isChanged()) \ { \ throw Exception( \ "Required parameter '" #PAR_NAME "' " \ @@ -445,7 +445,7 @@ void registerStorageKafka(StorageFactory & factory) } \ /* The same argument is given in two places */ \ if (has_settings && \ - kafka_settings.PAR_NAME.changed && \ + kafka_settings.PAR_NAME.isChanged() && \ args_count >= ARG_NUM) \ { \ throw Exception( \ @@ -469,7 +469,7 @@ void registerStorageKafka(StorageFactory & factory) #undef CHECK_KAFKA_STORAGE_ARGUMENT // Get and check broker list - String brokers = kafka_settings.kafka_broker_list.value; + String brokers = kafka_settings.kafka_broker_list; if (args_count >= 1) { const auto * ast = engine_args[0]->as(); @@ -524,7 +524,7 @@ void registerStorageKafka(StorageFactory & factory) } // Parse row delimiter (optional) - char row_delimiter = kafka_settings.kafka_row_delimiter.value; + char row_delimiter = kafka_settings.kafka_row_delimiter; if (args_count >= 5) { engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); @@ -571,7 +571,7 @@ void registerStorageKafka(StorageFactory & factory) } // Parse number of consumers (optional) - UInt64 num_consumers = kafka_settings.kafka_num_consumers.value; + UInt64 num_consumers = kafka_settings.kafka_num_consumers; if (args_count >= 7) { const auto * ast = engine_args[6]->as(); @@ -586,7 +586,7 @@ void registerStorageKafka(StorageFactory & factory) } // Parse max block size (optional) - UInt64 max_block_size = static_cast(kafka_settings.kafka_max_block_size.value); + UInt64 max_block_size = static_cast(kafka_settings.kafka_max_block_size); if (args_count >= 8) { const auto * ast = engine_args[7]->as(); @@ -601,7 +601,7 @@ void registerStorageKafka(StorageFactory & factory) } } - size_t skip_broken = static_cast(kafka_settings.kafka_skip_broken_messages.value); + size_t skip_broken = static_cast(kafka_settings.kafka_skip_broken_messages); if (args_count >= 9) { const auto * ast = engine_args[8]->as(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 54da38df541..6108704b45a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -100,8 +100,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() std::sort(entries.begin(), entries.end()); String min_saved_record_log_str = entries[ - entries.size() > storage.settings.max_replicated_logs_to_keep.value - ? entries.size() - storage.settings.max_replicated_logs_to_keep.value + entries.size() > storage.settings.max_replicated_logs_to_keep + ? entries.size() - storage.settings.max_replicated_logs_to_keep : 0]; /// Replicas that were marked is_lost but are active. @@ -203,7 +203,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate); /// We will not touch the last `min_replicated_logs_to_keep` records. - entries.erase(entries.end() - std::min(entries.size(), storage.settings.min_replicated_logs_to_keep.value), entries.end()); + entries.erase(entries.end() - std::min(entries.size(), storage.settings.min_replicated_logs_to_keep), entries.end()); /// We will not touch records that are no less than `min_saved_log_pointer`. entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end()); @@ -299,7 +299,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() /// Virtual node, all nodes that are "greater" than this one will be deleted NodeWithStat block_threshold{{}, time_threshold}; - size_t current_deduplication_window = std::min(timed_blocks.size(), storage.settings.replicated_deduplication_window.value); + size_t current_deduplication_window = std::min(timed_blocks.size(), storage.settings.replicated_deduplication_window); auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window; auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 88a7c07377c..62638e1982a 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -329,7 +329,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & c const auto & settings = context.getSettingsRef(); /// Ban an attempt to make async insert into the table belonging to DatabaseMemory - if (path.empty() && !owned_cluster && !settings.insert_distributed_sync.value) + if (path.empty() && !owned_cluster && !settings.insert_distributed_sync) { throw Exception("Storage " + getName() + " must has own data directory to enable asynchronous inserts", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 3c90917b0f6..8d957a1c9cf 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -164,8 +164,8 @@ void registerStorageJoin(StorageFactory & factory) args.database_name, args.table_name, key_names, - join_use_nulls.value, - SizeLimits{max_rows_in_join.value, max_bytes_in_join.value, join_overflow_mode.value}, + join_use_nulls, + SizeLimits{max_rows_in_join, max_bytes_in_join, join_overflow_mode}, kind, strictness, args.columns, From 8e72d4c2ec1181dd84aea79227bf7732f6594339 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 11:35:49 +0300 Subject: [PATCH 17/59] Tryin COW settings --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 56 +++++++++++-------- dbms/src/Storages/MergeTree/MergeTreeData.h | 18 ++++-- .../Storages/MergeTree/MergeTreeSettings.cpp | 5 ++ .../Storages/MergeTree/MergeTreeSettings.h | 11 +++- .../MergeTree/registerStorageMergeTree.cpp | 6 +- 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b0453b04c8a..e6a8f2f7ba1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -104,7 +104,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & sample_by_ast_, const ASTPtr & ttl_table_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool require_part_metadata_, bool attach, BrokenPartCallback broken_part_callback_) @@ -132,7 +132,7 @@ MergeTreeData::MergeTreeData( sampling_expr_column_name = sample_by_ast->getColumnName(); if (!primary_key_sample.has(sampling_expr_column_name) - && !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. + && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); auto syntax = SyntaxAnalyzer(global_context).analyze(sample_by_ast, getColumns().getAllPhysical()); @@ -727,6 +727,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) { LOG_DEBUG(log, "Loading data parts"); + auto settings_ptr = getImmutableSettings(); Strings part_file_names; Poco::DirectoryIterator end; for (Poco::DirectoryIterator it(full_path); it != end; ++it) @@ -843,12 +844,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); } - if (has_non_adaptive_parts && has_adaptive_parts && !settings.enable_mixed_granularity_parts) + if (has_non_adaptive_parts && has_adaptive_parts && !settings_ptr->enable_mixed_granularity_parts) throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR); has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; - if (suspicious_broken_parts > settings.max_suspicious_broken_parts && !skip_sanity_checks) + if (suspicious_broken_parts > settings_ptr->max_suspicious_broken_parts && !skip_sanity_checks) throw Exception("Suspiciously many (" + toString(suspicious_broken_parts) + ") broken parts to remove.", ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS); @@ -938,7 +939,7 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life time_t current_time = time(nullptr); ssize_t deadline = (custom_directories_lifetime_seconds >= 0) ? current_time - custom_directories_lifetime_seconds - : current_time - settings.temporary_directories_lifetime.totalSeconds(); + : current_time - settings_ptr->temporary_directories_lifetime.totalSeconds(); /// Delete temporary directories older than a day. Poco::DirectoryIterator end; @@ -989,7 +990,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example). part_remove_time < now && - now - part_remove_time > settings.old_parts_lifetime.totalSeconds()) + now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds()) { parts_to_delete.emplace_back(it); } @@ -1290,7 +1291,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name if (part) part_mrk_file_extension = part->index_granularity_info.marks_file_extension; else - part_mrk_file_extension = settings.index_granularity_bytes == 0 ? getNonAdaptiveMrkExtension() : getAdaptiveMrkExtension(); + part_mrk_file_extension = mutable_settings->index_granularity_bytes == 0 ? getNonAdaptiveMrkExtension() : getAdaptiveMrkExtension(); using NameToType = std::map; NameToType new_types; @@ -1448,6 +1449,7 @@ void MergeTreeData::alterDataPart( bool skip_sanity_checks, AlterDataPartTransactionPtr & transaction) { + auto settings = getImmutableSettings(); ExpressionActionsPtr expression; const auto & part = transaction->getDataPart(); bool force_update_metadata; @@ -1463,12 +1465,12 @@ void MergeTreeData::alterDataPart( ++num_files_to_remove; if (!skip_sanity_checks - && (num_files_to_modify > settings.max_files_to_modify_in_alter_columns - || num_files_to_remove > settings.max_files_to_remove_in_alter_columns)) + && (num_files_to_modify > settings->max_files_to_modify_in_alter_columns + || num_files_to_remove > settings->max_files_to_remove_in_alter_columns)) { transaction->clear(); - const bool forbidden_because_of_modify = num_files_to_modify > settings.max_files_to_modify_in_alter_columns; + const bool forbidden_because_of_modify = num_files_to_modify > settings->max_files_to_modify_in_alter_columns; std::stringstream exception_message; exception_message @@ -1500,7 +1502,7 @@ void MergeTreeData::alterDataPart( << " If it is not an error, you could increase merge_tree/" << (forbidden_because_of_modify ? "max_files_to_modify_in_alter_columns" : "max_files_to_remove_in_alter_columns") << " parameter in configuration file (current value: " - << (forbidden_because_of_modify ? settings.max_files_to_modify_in_alter_columns : settings.max_files_to_remove_in_alter_columns) + << (forbidden_because_of_modify ? settings->max_files_to_modify_in_alter_columns : settings->max_files_to_remove_in_alter_columns) << ")"; throw Exception(exception_message.str(), ErrorCodes::TABLE_DIFFERS_TOO_MUCH); @@ -1590,13 +1592,17 @@ void MergeTreeData::alterSettings( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - settings.updateFromChanges(new_changes); - IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); + { + MutableMergeTreeSettingsPtr settings = std::move(*mutable_settings).mutate(); + settings->updateFromChanges(new_changes); + IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); + mutable_settings = std::move(settings); + } } bool MergeTreeData::hasSetting(const String & setting_name) const { - return settings.findIndex(setting_name) != MergeTreeSettings::npos; + return MergeTreeSettings::findIndex(setting_name) != MergeTreeSettings::npos; } void MergeTreeData::removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part) @@ -2231,26 +2237,27 @@ std::optional MergeTreeData::getMinPartDataVersion() const void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const { + auto settings_ptr = getImmutableSettings(); const size_t parts_count_in_total = getPartsCount(); - if (parts_count_in_total >= settings.max_parts_in_total) + if (parts_count_in_total >= settings_ptr->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); } const size_t parts_count_in_partition = getMaxPartsCountForPartition(); - if (parts_count_in_partition < settings.parts_to_delay_insert) + if (parts_count_in_partition < settings_ptr->parts_to_delay_insert) return; - if (parts_count_in_partition >= settings.parts_to_throw_insert) + if (parts_count_in_partition >= settings_ptr->parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS); } - const size_t max_k = settings.parts_to_throw_insert - settings.parts_to_delay_insert; /// always > 0 - const size_t k = 1 + parts_count_in_partition - settings.parts_to_delay_insert; /// from 1 to max_k - const double delay_milliseconds = ::pow(settings.max_delay_to_insert * 1000, static_cast(k) / max_k); + const size_t max_k = settings_ptr->parts_to_throw_insert - settings_ptr->parts_to_delay_insert; /// always > 0 + const size_t k = 1 + parts_count_in_partition - settings_ptr->parts_to_delay_insert; /// from 1 to max_k + const double delay_milliseconds = ::pow(settings_ptr->max_delay_to_insert * 1000, static_cast(k) / max_k); ProfileEvents::increment(ProfileEvents::DelayedInserts); ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds); @@ -2268,8 +2275,9 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const void MergeTreeData::throwInsertIfNeeded() const { + auto settings_ptr = getImmutableSettings(); const size_t parts_count_in_total = getPartsCount(); - if (parts_count_in_total >= settings.max_parts_in_total) + if (parts_count_in_total >= settings_ptr->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); @@ -2277,7 +2285,7 @@ void MergeTreeData::throwInsertIfNeeded() const const size_t parts_count_in_partition = getMaxPartsCountForPartition(); - if (parts_count_in_partition >= settings.parts_to_throw_insert) + if (parts_count_in_partition >= settings_ptr->parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS); @@ -2860,7 +2868,9 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const { - if (!settings.enable_mixed_granularity_parts || settings.index_granularity_bytes == 0) + auto settings_ptr = getImmutableSettings(); + + if (!settings_ptr->enable_mixed_granularity_parts || settings_ptr->index_granularity_bytes == 0) { if (!canUseAdaptiveGranularity() && src_part->index_granularity_info.is_adaptive) return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 756c188c724..38e58f3a6da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -313,7 +313,7 @@ public: const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. const ASTPtr & ttl_table_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool require_part_metadata_, bool attach, BrokenPartCallback broken_part_callback_ = [](const String &){}); @@ -591,8 +591,9 @@ public: bool canUseAdaptiveGranularity() const { - return settings.index_granularity_bytes != 0 && - (settings.enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); + auto settings_ptr = getImmutableSettings(); + return settings_ptr->index_granularity_bytes != 0 && + (settings_ptr->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); } @@ -645,8 +646,6 @@ public: String sampling_expr_column_name; Names columns_required_for_sampling; - MergeTreeSettings settings; - /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; @@ -655,7 +654,13 @@ public: bool has_non_adaptive_index_granularity_parts = false; + MergeTreeSettingsPtr getImmutableSettings() const + { + return mutable_settings; + } + protected: + friend struct MergeTreeDataPart; friend class MergeTreeDataMergerMutator; friend class ReplicatedMergeTreeAlterThread; @@ -683,6 +688,9 @@ protected: String log_name; Logger * log; + /// Settings COW pointer. Data maybe changed at any point of time. + /// If you need consistent settings, just copy pointer to your scope. + MergeTreeSettingsPtr mutable_settings; /// Work with data parts diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 425a0851d67..3b90bf293ba 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -75,4 +75,9 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) #undef ADD_IF_ABSENT } + +MergeTreeSettings * MergeTreeSettings::clone() const +{ + return new MergeTreeSettings(*this); +} } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index dc94ad5b11f..54416f934a6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -2,6 +2,7 @@ #include #include +#include namespace Poco @@ -21,9 +22,11 @@ class ASTStorage; /** Settings for the MergeTree family of engines. * Could be loaded from config or from a CREATE TABLE query (SETTINGS clause). */ -struct MergeTreeSettings : public SettingsCollection +struct MergeTreeSettings : public SettingsCollection, public COW { + friend class COW; + /// M (mutable) for normal settings, IM (immutable) for not updateable settings. #define LIST_OF_MERGE_TREE_SETTINGS(M, IM) \ IM(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ @@ -93,6 +96,12 @@ struct MergeTreeSettings : public SettingsCollection /// NOTE: will rewrite the AST to add immutable settings. void loadFromQuery(ASTStorage & storage_def); + + MergeTreeSettings * clone() const; + ~MergeTreeSettings() {} }; +using MergeTreeSettingsPtr = MergeTreeSettings::Ptr; +using MutableMergeTreeSettingsPtr = MergeTreeSettings::MutablePtr; + } diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 138e7c14f9d..a8e2f137e1a 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -574,7 +574,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr sample_by_ast; ASTPtr ttl_table_ast; IndicesDescription indices_description; - MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); + MutableMergeTreeSettingsPtr storage_settings = MergeTreeSettings::create(args.context.getMergeTreeSettings()); if (is_extended_storage_def) { @@ -603,7 +603,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) std::dynamic_pointer_cast(index->clone())); - storage_settings.loadFromQuery(*args.storage_def); + storage_settings->loadFromQuery(*args.storage_def); } else { @@ -625,7 +625,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) const auto * ast = engine_args.back()->as(); if (ast && ast->value.getType() == Field::Types::UInt64) - storage_settings.index_granularity = safeGet(ast->value); + storage_settings->index_granularity = safeGet(ast->value); else throw Exception( "Index granularity must be a positive integer" + getMergeTreeVerboseHelp(is_extended_storage_def), From ad81c743c186cff9d2e0e5ff1019f9be44088392 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 13:29:31 +0300 Subject: [PATCH 18/59] Buildable code --- .../Storages/MergeTree/DataPartsExchange.cpp | 8 ++- .../MergeTree/IMergedBlockOutputStream.cpp | 7 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 54 ++++++++------- dbms/src/Storages/MergeTree/MergeTreeData.h | 36 +++++++--- .../MergeTree/MergeTreeDataMergerMutator.cpp | 32 +++++---- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 21 +++--- .../MergeTreeIndexGranularityInfo.cpp | 5 +- .../Storages/MergeTree/MergeTreeSettings.cpp | 4 +- .../Storages/MergeTree/MergeTreeSettings.h | 2 +- .../MergeTreeThreadSelectBlockInputStream.cpp | 2 +- .../ReplicatedMergeTreeCleanupThread.cpp | 30 +++++---- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 3 +- .../ReplicatedMergeTreeRestartingThread.cpp | 17 +++-- .../ReplicatedMergeTreeTableMetadata.cpp | 5 +- .../MergeTree/registerStorageMergeTree.cpp | 4 +- dbms/src/Storages/StorageMergeTree.cpp | 11 +-- dbms/src/Storages/StorageMergeTree.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 67 +++++++++++-------- .../src/Storages/StorageReplicatedMergeTree.h | 2 +- 19 files changed, 186 insertions(+), 126 deletions(-) diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index f01b384d441..795cc68f1ea 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -54,14 +54,15 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); String part_name = params.get("part"); + const auto data_settings = data.getCOWSettings(); /// Validation of the input that may come from malicious replica. MergeTreePartInfo::fromPartName(part_name, data.format_version); static std::atomic_uint total_sends {0}; - if ((data.settings.replicated_max_parallel_sends && total_sends >= data.settings.replicated_max_parallel_sends) - || (data.settings.replicated_max_parallel_sends_for_table && data.current_table_sends >= data.settings.replicated_max_parallel_sends_for_table)) + if ((data_settings->replicated_max_parallel_sends && total_sends >= data_settings->replicated_max_parallel_sends) + || (data_settings->replicated_max_parallel_sends_for_table && data.current_table_sends >= data_settings->replicated_max_parallel_sends_for_table)) { response.setStatus(std::to_string(HTTP_TOO_MANY_REQUESTS)); response.setReason("Too many concurrent fetches, try again later"); @@ -174,6 +175,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( { /// Validation of the input that may come from malicious replica. MergeTreePartInfo::fromPartName(part_name, data.format_version); + const auto data_settings = data.getCOWSettings(); Poco::URI uri; uri.setScheme(interserver_scheme); @@ -200,7 +202,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, - data.settings.replicated_max_parallel_fetches_for_host + data_settings->replicated_max_parallel_fetches_for_host }; static const String TMP_PREFIX = "tmp_fetch_"; diff --git a/dbms/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 2bd0ebb61ea..a20b03cc7b0 100644 --- a/dbms/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -32,7 +32,7 @@ IMergedBlockOutputStream::IMergedBlockOutputStream( , index_granularity(index_granularity_) , compute_granularity(index_granularity.empty()) , codec(std::move(codec_)) - , with_final_mark(storage.settings.write_final_mark && storage.canUseAdaptiveGranularity()) + , with_final_mark(storage.getCOWSettings()->write_final_mark && storage.canUseAdaptiveGranularity()) { if (blocks_are_granules_size && !index_granularity.empty()) throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR); @@ -133,10 +133,11 @@ void fillIndexGranularityImpl( void IMergedBlockOutputStream::fillIndexGranularity(const Block & block) { + const auto storage_settings = storage.getCOWSettings(); fillIndexGranularityImpl( block, - storage.settings.index_granularity_bytes, - storage.settings.index_granularity, + storage_settings->index_granularity_bytes, + storage_settings->index_granularity, blocks_are_granules_size, index_offset, index_granularity, diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f8ff2ab87b1..4d47f60a2fe 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -110,7 +110,6 @@ MergeTreeData::MergeTreeData( BrokenPartCallback broken_part_callback_) : global_context(context_), merging_params(merging_params_), - settings(settings_), partition_by_ast(partition_by_ast_), sample_by_ast(sample_by_ast_), ttl_table_ast(ttl_table_ast_), @@ -119,9 +118,11 @@ MergeTreeData::MergeTreeData( full_path(full_path_), broken_part_callback(broken_part_callback_), log_name(database_name + "." + table_name), log(&Logger::get(log_name)), + guarded_settings(std::move(settings_)), data_parts_by_info(data_parts_indexes.get()), data_parts_by_state_and_info(data_parts_indexes.get()) { + const auto settings = getCOWSettings(); setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_); /// NOTE: using the same columns list as is read when performing actual merges. @@ -725,7 +726,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) { LOG_DEBUG(log, "Loading data parts"); - auto settings_ptr = getImmutableSettings(); + const auto settings = getCOWSettings(); Strings part_file_names; Poco::DirectoryIterator end; for (Poco::DirectoryIterator it(full_path); it != end; ++it) @@ -747,7 +748,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } /// Parallel loading of data parts. - size_t num_threads = std::min(size_t(settings.max_part_loading_threads), part_file_names.size()); + size_t num_threads = std::min(size_t(settings->max_part_loading_threads), part_file_names.size()); std::mutex mutex; @@ -866,12 +867,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) pool.wait(); - if (has_non_adaptive_parts && has_adaptive_parts && !settings_ptr->enable_mixed_granularity_parts) + if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR); has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; - if (suspicious_broken_parts > settings_ptr->max_suspicious_broken_parts && !skip_sanity_checks) + if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) throw Exception("Suspiciously many (" + toString(suspicious_broken_parts) + ") broken parts to remove.", ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS); @@ -958,10 +959,11 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life if (!lock.try_lock()) return; + const auto settings = getCOWSettings(); time_t current_time = time(nullptr); ssize_t deadline = (custom_directories_lifetime_seconds >= 0) ? current_time - custom_directories_lifetime_seconds - : current_time - settings_ptr->temporary_directories_lifetime.totalSeconds(); + : current_time - settings->temporary_directories_lifetime.totalSeconds(); /// Delete temporary directories older than a day. Poco::DirectoryIterator end; @@ -1012,7 +1014,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example). part_remove_time < now && - now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds()) + now - part_remove_time > getCOWSettings()->old_parts_lifetime.totalSeconds()) { parts_to_delete.emplace_back(it); } @@ -1096,11 +1098,12 @@ void MergeTreeData::clearOldPartsFromFilesystem() void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts_to_remove) { - if (parts_to_remove.size() > 1 && settings.max_part_removal_threads > 1 && parts_to_remove.size() > settings.concurrent_part_removal_threshold) + const auto settings = getCOWSettings(); + if (parts_to_remove.size() > 1 && settings->max_part_removal_threads > 1 && parts_to_remove.size() > settings->concurrent_part_removal_threshold) { /// Parallel parts removal. - size_t num_threads = std::min(size_t(settings.max_part_removal_threads), parts_to_remove.size()); + size_t num_threads = std::min(size_t(settings->max_part_removal_threads), parts_to_remove.size()); ThreadPool pool(num_threads); /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. @@ -1332,6 +1335,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name const IndicesASTs & old_indices, const IndicesASTs & new_indices, ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const { + const auto settings = getCOWSettings(); out_expression = nullptr; out_rename_map = {}; out_force_update_metadata = false; @@ -1339,7 +1343,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name if (part) part_mrk_file_extension = part->index_granularity_info.marks_file_extension; else - part_mrk_file_extension = mutable_settings->index_granularity_bytes == 0 ? getNonAdaptiveMrkExtension() : getAdaptiveMrkExtension(); + part_mrk_file_extension = settings->index_granularity_bytes == 0 ? getNonAdaptiveMrkExtension() : getAdaptiveMrkExtension(); using NameToType = std::map; NameToType new_types; @@ -1497,7 +1501,7 @@ void MergeTreeData::alterDataPart( bool skip_sanity_checks, AlterDataPartTransactionPtr & transaction) { - auto settings = getImmutableSettings(); + const auto settings = getCOWSettings(); ExpressionActionsPtr expression; const auto & part = transaction->getDataPart(); bool force_update_metadata; @@ -1641,10 +1645,10 @@ void MergeTreeData::alterSettings( TableStructureWriteLockHolder & table_lock_holder) { { - MutableMergeTreeSettingsPtr settings = std::move(*mutable_settings).mutate(); + MutableMergeTreeSettingsPtr settings = std::move(*guarded_settings.getPtr()).mutate(); settings->updateFromChanges(new_changes); IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); - mutable_settings = std::move(settings); + guarded_settings.setPtr(std::move(settings)); } } @@ -2285,27 +2289,27 @@ std::optional MergeTreeData::getMinPartDataVersion() const void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const { - auto settings_ptr = getImmutableSettings(); + const auto settings = getCOWSettings(); const size_t parts_count_in_total = getPartsCount(); - if (parts_count_in_total >= settings_ptr->max_parts_in_total) + if (parts_count_in_total >= settings->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); } const size_t parts_count_in_partition = getMaxPartsCountForPartition(); - if (parts_count_in_partition < settings_ptr->parts_to_delay_insert) + if (parts_count_in_partition < settings->parts_to_delay_insert) return; - if (parts_count_in_partition >= settings_ptr->parts_to_throw_insert) + if (parts_count_in_partition >= settings->parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS); } - const size_t max_k = settings_ptr->parts_to_throw_insert - settings_ptr->parts_to_delay_insert; /// always > 0 - const size_t k = 1 + parts_count_in_partition - settings_ptr->parts_to_delay_insert; /// from 1 to max_k - const double delay_milliseconds = ::pow(settings_ptr->max_delay_to_insert * 1000, static_cast(k) / max_k); + const size_t max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert; /// always > 0 + const size_t k = 1 + parts_count_in_partition - settings->parts_to_delay_insert; /// from 1 to max_k + const double delay_milliseconds = ::pow(settings->max_delay_to_insert * 1000, static_cast(k) / max_k); ProfileEvents::increment(ProfileEvents::DelayedInserts); ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds); @@ -2323,9 +2327,9 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const void MergeTreeData::throwInsertIfNeeded() const { - auto settings_ptr = getImmutableSettings(); + const auto settings = getCOWSettings(); const size_t parts_count_in_total = getPartsCount(); - if (parts_count_in_total >= settings_ptr->max_parts_in_total) + if (parts_count_in_total >= settings->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); @@ -2333,7 +2337,7 @@ void MergeTreeData::throwInsertIfNeeded() const const size_t parts_count_in_partition = getMaxPartsCountForPartition(); - if (parts_count_in_partition >= settings_ptr->parts_to_throw_insert) + if (parts_count_in_partition >= settings->parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS); @@ -2916,9 +2920,9 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const { - auto settings_ptr = getImmutableSettings(); + const auto settings = getCOWSettings(); - if (!settings_ptr->enable_mixed_granularity_parts || settings_ptr->index_granularity_bytes == 0) + if (!settings->enable_mixed_granularity_parts || settings->index_granularity_bytes == 0) { if (!canUseAdaptiveGranularity() && src_part->index_granularity_info.is_adaptive) return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index c07e2fc34f4..b94555477f9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -594,9 +594,9 @@ public: /// Has additional constraint in replicated version virtual bool canUseAdaptiveGranularity() const { - auto settings_ptr = getImmutableSettings(); - return settings_ptr->index_granularity_bytes != 0 && - (settings_ptr->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); + const auto settings = getCOWSettings(); + return settings->index_granularity_bytes != 0 && + (settings->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); } @@ -657,9 +657,12 @@ public: bool has_non_adaptive_index_granularity_parts = false; - MergeTreeSettingsPtr getImmutableSettings() const + /// Get copy-on-write pointer to storage settings. + /// Copy this pointer into your scope and you will + /// get consistent settings. + const MergeTreeSettingsPtr getCOWSettings() const { - return mutable_settings; + return guarded_settings.copyPtr(); } protected: @@ -691,9 +694,26 @@ protected: String log_name; Logger * log; - /// Settings COW pointer. Data maybe changed at any point of time. - /// If you need consistent settings, just copy pointer to your scope. - MergeTreeSettingsPtr mutable_settings; + /// Just hides settings pointer from direct usage + class MergeTreeSettingsGuard + { + private: + /// Settings COW pointer. Data maybe changed at any point of time. + /// If you need consistent settings, just copy pointer to your scope. + MergeTreeSettingsPtr settings_ptr; + public: + MergeTreeSettingsGuard(MergeTreeSettingsPtr settings_ptr_) + : settings_ptr(settings_ptr_) + {} + + const MergeTreeSettingsPtr copyPtr() const { return settings_ptr; } + MergeTreeSettingsPtr & getPtr() { return settings_ptr; } + void setPtr(MergeTreeSettingsPtr ptr) { settings_ptr = ptr; } + }; + + /// Storage settings. Don't use this field directly, if you + /// want readonly settings. Prefer getCOWSettings() method. + MergeTreeSettingsGuard guarded_settings; /// Work with data parts diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3a4d4038317..fad63f35aec 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -141,15 +141,16 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz throw Exception("Logical error: invalid arguments passed to getMaxSourcePartsSize: pool_used > pool_size", ErrorCodes::LOGICAL_ERROR); size_t free_entries = pool_size - pool_used; + const auto data_settings = data.getCOWSettings(); UInt64 max_size = 0; - if (free_entries >= data.settings.number_of_free_entries_in_pool_to_lower_max_size_of_merge) - max_size = data.settings.max_bytes_to_merge_at_max_space_in_pool; + if (free_entries >= data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge) + max_size = data_settings->max_bytes_to_merge_at_max_space_in_pool; else max_size = interpolateExponential( - data.settings.max_bytes_to_merge_at_min_space_in_pool, - data.settings.max_bytes_to_merge_at_max_space_in_pool, - static_cast(free_entries) / data.settings.number_of_free_entries_in_pool_to_lower_max_size_of_merge); + data_settings->max_bytes_to_merge_at_min_space_in_pool, + data_settings->max_bytes_to_merge_at_max_space_in_pool, + static_cast(free_entries) / data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge); return std::min(max_size, static_cast(DiskSpaceMonitor::getUnreservedFreeSpace(data.full_path) / DISK_USAGE_COEFFICIENT_TO_SELECT)); } @@ -169,6 +170,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( String * out_disable_reason) { MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector(); + const auto data_settings = data.getCOWSettings(); if (data_parts.empty()) { @@ -223,7 +225,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( merge_settings.base = 1; bool can_merge_with_ttl = - (current_time - last_merge_with_ttl > data.settings.merge_with_ttl_timeout); + (current_time - last_merge_with_ttl > data_settings->merge_with_ttl_timeout); /// NOTE Could allow selection of different merge strategy. if (can_merge_with_ttl && has_part_with_expired_ttl && !ttl_merges_blocker.isCancelled()) @@ -545,6 +547,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Names all_column_names = data.getColumns().getNamesOfPhysical(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); + const auto data_settings = data.getCOWSettings(); NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; @@ -617,13 +620,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// We count total amount of bytes in parts /// and use direct_io + aio if there is more than min_merge_bytes_to_use_direct_io bool read_with_direct_io = false; - if (data.settings.min_merge_bytes_to_use_direct_io != 0) + if (data_settings->min_merge_bytes_to_use_direct_io != 0) { size_t total_size = 0; for (const auto & part : parts) { total_size += part->bytes_on_disk; - if (total_size >= data.settings.min_merge_bytes_to_use_direct_io) + if (total_size >= data_settings->min_merge_bytes_to_use_direct_io) { LOG_DEBUG(log, "Will merge parts reading files in O_DIRECT"); read_with_direct_io = true; @@ -720,7 +723,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merging_columns, compression_codec, merged_column_to_size, - data.settings.min_merge_bytes_to_use_direct_io, + data_settings->min_merge_bytes_to_use_direct_io, blocks_are_granules_size}; merged_stream->readPrefix(); @@ -938,6 +941,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor auto in = mutations_interpreter.execute(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); + const auto data_settings = data.getCOWSettings(); Block in_header = in->getHeader(); @@ -995,7 +999,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor } NameSet files_to_skip = {"checksums.txt", "columns.txt"}; - auto mrk_extension = data.settings.index_granularity_bytes ? getAdaptiveMrkExtension() : getNonAdaptiveMrkExtension(); + auto mrk_extension = data_settings->index_granularity_bytes ? getAdaptiveMrkExtension() : getNonAdaptiveMrkExtension(); for (const auto & entry : in_header) { IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) @@ -1092,9 +1096,11 @@ MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMer const MergeTreeData::DataPartsVector & parts, size_t sum_rows_upper_bound, const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const { + const auto data_settings = data.getCOWSettings(); + if (deduplicate) return MergeAlgorithm::Horizontal; - if (data.settings.enable_vertical_merge_algorithm == 0) + if (data_settings->enable_vertical_merge_algorithm == 0) return MergeAlgorithm::Horizontal; if (need_remove_expired_values) return MergeAlgorithm::Horizontal; @@ -1105,9 +1111,9 @@ MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMer data.merging_params.mode == MergeTreeData::MergingParams::Replacing || data.merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - bool enough_ordinary_cols = gathering_columns.size() >= data.settings.vertical_merge_algorithm_min_columns_to_activate; + bool enough_ordinary_cols = gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; - bool enough_total_rows = sum_rows_upper_bound >= data.settings.vertical_merge_algorithm_min_rows_to_activate; + bool enough_total_rows = sum_rows_upper_bound >= data_settings->vertical_merge_algorithm_min_rows_to_activate; bool no_parts_overflow = parts.size() <= RowSourcePart::MAX_PARTS; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 513a713651b..314967b485d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -672,6 +672,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( size_t sum_marks = 0; size_t total_rows = 0; + const auto data_settings = data.getCOWSettings(); size_t adaptive_parts = 0; for (size_t i = 0; i < parts.size(); ++i) { @@ -688,18 +689,18 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( size_t index_granularity_bytes = 0; if (adaptive_parts > parts.size() / 2) - index_granularity_bytes = data.settings.index_granularity_bytes; + index_granularity_bytes = data_settings->index_granularity_bytes; const size_t max_marks_to_use_cache = roundRowsOrBytesToMarks( settings.merge_tree_max_rows_to_use_cache, settings.merge_tree_max_bytes_to_use_cache, - data.settings.index_granularity, + data_settings->index_granularity, index_granularity_bytes); const size_t min_marks_for_concurrent_read = roundRowsOrBytesToMarks( settings.merge_tree_min_rows_for_concurrent_read, settings.merge_tree_min_bytes_for_concurrent_read, - data.settings.index_granularity, + data_settings->index_granularity, index_granularity_bytes); if (sum_marks > max_marks_to_use_cache) @@ -830,6 +831,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO SortingInfoPtr sorting_info = query_info.sorting_info; size_t adaptive_parts = 0; std::vector sum_marks_in_parts(parts.size()); + const auto data_settings = data.getCOWSettings(); /// In case of reverse order let's split ranges to avoid reading much data. auto split_ranges = [max_block_size](const auto & ranges, size_t rows_granularity, size_t num_marks_in_part) @@ -862,7 +864,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO sum_marks += sum_marks_in_parts[i]; if (sorting_info->direction == -1) - parts[i].ranges = split_ranges(parts[i].ranges, data.settings.index_granularity, sum_marks_in_parts[i]); + parts[i].ranges = split_ranges(parts[i].ranges, data_settings->index_granularity, sum_marks_in_parts[i]); /// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`. std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); @@ -873,18 +875,18 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO size_t index_granularity_bytes = 0; if (adaptive_parts > parts.size() / 2) - index_granularity_bytes = data.settings.index_granularity_bytes; + index_granularity_bytes = data_settings->index_granularity_bytes; const size_t max_marks_to_use_cache = roundRowsOrBytesToMarks( settings.merge_tree_max_rows_to_use_cache, settings.merge_tree_max_bytes_to_use_cache, - data.settings.index_granularity, + data_settings->index_granularity, index_granularity_bytes); const size_t min_marks_for_concurrent_read = roundRowsOrBytesToMarks( settings.merge_tree_min_rows_for_concurrent_read, settings.merge_tree_min_bytes_for_concurrent_read, - data.settings.index_granularity, + data_settings->index_granularity, index_granularity_bytes); if (sum_marks > max_marks_to_use_cache) @@ -1012,6 +1014,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal const Names & virt_columns, const Settings & settings) const { + const auto data_settings = data.getCOWSettings(); size_t sum_marks = 0; size_t adaptive_parts = 0; for (size_t i = 0; i < parts.size(); ++i) @@ -1025,12 +1028,12 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal size_t index_granularity_bytes = 0; if (adaptive_parts >= parts.size() / 2) - index_granularity_bytes = data.settings.index_granularity_bytes; + index_granularity_bytes = data_settings->index_granularity_bytes; const size_t max_marks_to_use_cache = roundRowsOrBytesToMarks( settings.merge_tree_max_rows_to_use_cache, settings.merge_tree_max_bytes_to_use_cache, - data.settings.index_granularity, + data_settings->index_granularity, index_granularity_bytes); if (sum_marks > max_marks_to_use_cache) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 63b19da9e64..143af37c10d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -25,12 +25,13 @@ std::optional MergeTreeIndexGranularityInfo::getMrkExtensionFromFS( MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo( const MergeTreeData & storage) { - fixed_index_granularity = storage.settings.index_granularity; + const auto storage_settings = storage.getCOWSettings(); + fixed_index_granularity = storage_settings->index_granularity; /// Granularity is fixed if (!storage.canUseAdaptiveGranularity()) setNonAdaptive(); else - setAdaptive(storage.settings.index_granularity_bytes); + setAdaptive(storage_settings->index_granularity_bytes); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 3b90bf293ba..7c3c7ef02cc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -76,8 +76,8 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) } -MergeTreeSettings * MergeTreeSettings::clone() const +MergeTreeSettings::MutablePtr MergeTreeSettings::clone() const { - return new MergeTreeSettings(*this); + return COW::create(*this); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 480209c740a..dff7cc2e523 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -100,7 +100,7 @@ struct MergeTreeSettings : public SettingsCollection, public /// NOTE: will rewrite the AST to add immutable settings. void loadFromQuery(ASTStorage & storage_def); - MergeTreeSettings * clone() const; + MutablePtr clone() const; ~MergeTreeSettings() {} }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp index 9c34782dec8..7a09bde0998 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp @@ -31,7 +31,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( /// Maybe it will make sence to add settings `max_block_size_bytes` if (max_block_size_rows && !storage.canUseAdaptiveGranularity()) { - size_t fixed_index_granularity = storage.settings.index_granularity; + size_t fixed_index_granularity = storage.getCOWSettings()->index_granularity; min_marks_to_read = (min_marks_to_read_ * fixed_index_granularity + max_block_size_rows - 1) / max_block_size_rows * max_block_size_rows / fixed_index_granularity; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 6108704b45a..2b03ed86895 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -27,8 +27,9 @@ ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplic void ReplicatedMergeTreeCleanupThread::run() { - const auto CLEANUP_SLEEP_MS = storage.settings.cleanup_delay_period * 1000 - + std::uniform_int_distribution(0, storage.settings.cleanup_delay_period_random_add * 1000)(rng); + auto storage_settings = storage.getCOWSettings(); + const auto CLEANUP_SLEEP_MS = storage_settings->cleanup_delay_period * 1000 + + std::uniform_int_distribution(0, storage_settings->cleanup_delay_period_random_add * 1000)(rng); try { @@ -74,6 +75,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() void ReplicatedMergeTreeCleanupThread::clearOldLogs() { auto zookeeper = storage.getZooKeeper(); + auto storage_settings = storage.getCOWSettings(); Coordination::Stat stat; if (!zookeeper->exists(storage.zookeeper_path + "/log", &stat)) @@ -82,7 +84,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() int children_count = stat.numChildren; /// We will wait for 1.1 times more records to accumulate than necessary. - if (static_cast(children_count) < storage.settings.min_replicated_logs_to_keep * 1.1) + if (static_cast(children_count) < storage_settings->min_replicated_logs_to_keep * 1.1) return; Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat); @@ -100,8 +102,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() std::sort(entries.begin(), entries.end()); String min_saved_record_log_str = entries[ - entries.size() > storage.settings.max_replicated_logs_to_keep - ? entries.size() - storage.settings.max_replicated_logs_to_keep + entries.size() > storage_settings->max_replicated_logs_to_keep + ? entries.size() - storage_settings->max_replicated_logs_to_keep : 0]; /// Replicas that were marked is_lost but are active. @@ -203,7 +205,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate); /// We will not touch the last `min_replicated_logs_to_keep` records. - entries.erase(entries.end() - std::min(entries.size(), storage.settings.min_replicated_logs_to_keep), entries.end()); + entries.erase(entries.end() - std::min(entries.size(), storage_settings->min_replicated_logs_to_keep), entries.end()); /// We will not touch records that are no less than `min_saved_log_pointer`. entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end()); @@ -285,6 +287,7 @@ struct ReplicatedMergeTreeCleanupThread::NodeWithStat void ReplicatedMergeTreeCleanupThread::clearOldBlocks() { auto zookeeper = storage.getZooKeeper(); + auto storage_settings = storage.getCOWSettings(); std::vector timed_blocks; getBlocksSortedByTime(*zookeeper, timed_blocks); @@ -294,12 +297,12 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() /// Use ZooKeeper's first node (last according to time) timestamp as "current" time. Int64 current_time = timed_blocks.front().ctime; - Int64 time_threshold = std::max(static_cast(0), current_time - static_cast(1000 * storage.settings.replicated_deduplication_window_seconds)); + Int64 time_threshold = std::max(static_cast(0), current_time - static_cast(1000 * storage_settings->replicated_deduplication_window_seconds)); /// Virtual node, all nodes that are "greater" than this one will be deleted NodeWithStat block_threshold{{}, time_threshold}; - size_t current_deduplication_window = std::min(timed_blocks.size(), storage.settings.replicated_deduplication_window); + size_t current_deduplication_window = std::min(timed_blocks.size(), storage_settings->replicated_deduplication_window); auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window; auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); @@ -401,10 +404,11 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & void ReplicatedMergeTreeCleanupThread::clearOldMutations() { - if (!storage.settings.finished_mutations_to_keep) + auto storage_settings = storage.getCOWSettings(); + if (!storage_settings->finished_mutations_to_keep) return; - if (storage.queue.countFinishedMutations() <= storage.settings.finished_mutations_to_keep) + if (storage.queue.countFinishedMutations() <= storage_settings->finished_mutations_to_keep) { /// Not strictly necessary, but helps to avoid unnecessary ZooKeeper requests. /// If even this replica hasn't finished enough mutations yet, then we don't need to clean anything. @@ -431,10 +435,10 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations() /// Do not remove entries that are greater than `min_pointer` (they are not done yet). entries.erase(std::upper_bound(entries.begin(), entries.end(), padIndex(min_pointer)), entries.end()); - /// Do not remove last `storage.settings.finished_mutations_to_keep` entries. - if (entries.size() <= storage.settings.finished_mutations_to_keep) + /// Do not remove last `storage_settings->finished_mutations_to_keep` entries. + if (entries.size() <= storage_settings->finished_mutations_to_keep) return; - entries.erase(entries.end() - storage.settings.finished_mutations_to_keep, entries.end()); + entries.erase(entries.end() - storage_settings->finished_mutations_to_keep, entries.end()); if (entries.empty()) return; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 665e8c9bd5c..5d044fbf839 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -961,7 +961,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( * (because it may be ordered by OPTIMIZE or early with differrent settings). */ UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(); - if (max_source_parts_size != data.settings.max_bytes_to_merge_at_max_space_in_pool + const auto data_settings = data.getCOWSettings(); + if (max_source_parts_size != data_settings->max_bytes_to_merge_at_max_space_in_pool && sum_parts_size_in_bytes > max_source_parts_size) { String reason = "Not executing log entry for part " + entry.new_part_name diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index a98625336c5..6145713492f 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -44,11 +44,12 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage , log(&Logger::get(log_name)) , active_node_identifier(generateActiveNodeIdentifier()) { - check_period_ms = storage.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000; + const auto storage_settings = storage.getCOWSettings(); + check_period_ms = storage_settings->zookeeper_session_expiration_check_period.totalSeconds() * 1000; /// Periodicity of checking lag of replica. - if (check_period_ms > static_cast(storage.settings.check_delay_period) * 1000) - check_period_ms = storage.settings.check_delay_period * 1000; + if (check_period_ms > static_cast(storage_settings->check_delay_period) * 1000) + check_period_ms = storage_settings->check_delay_period * 1000; task = storage.global_context.getSchedulePool().createTask(log_name, [this]{ run(); }); } @@ -121,7 +122,8 @@ void ReplicatedMergeTreeRestartingThread::run() } time_t current_time = time(nullptr); - if (current_time >= prev_time_of_check_delay + static_cast(storage.settings.check_delay_period)) + const auto storage_settings = storage.getCOWSettings(); + if (current_time >= prev_time_of_check_delay + static_cast(storage_settings->check_delay_period)) { /// Find out lag of replicas. time_t absolute_delay = 0; @@ -136,10 +138,10 @@ void ReplicatedMergeTreeRestartingThread::run() /// We give up leadership if the relative lag is greater than threshold. if (storage.is_leader - && relative_delay > static_cast(storage.settings.min_relative_delay_to_yield_leadership)) + && relative_delay > static_cast(storage_settings->min_relative_delay_to_yield_leadership)) { LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" - << storage.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); + << storage_settings->min_relative_delay_to_yield_leadership << "). Will yield leadership."); ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); @@ -169,6 +171,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() activateReplica(); const auto & zookeeper = storage.getZooKeeper(); + const auto storage_settings = storage.getCOWSettings(); storage.cloneReplicaIfNeeded(zookeeper); @@ -181,7 +184,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() updateQuorumIfWeHavePart(); - if (storage.settings.replicated_can_become_leader) + if (storage_settings->replicated_can_become_leader) storage.enterLeaderElection(); else LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 10f77358b7d..381d1f47412 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -27,8 +27,9 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; + const auto data_settings = data.getCOWSettings(); sampling_expression = formattedAST(data.sample_by_ast); - index_granularity = data.settings.index_granularity; + index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; @@ -48,7 +49,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr ttl_table = formattedAST(data.ttl_table_ast); skip_indices = data.getIndices().toString(); if (data.canUseAdaptiveGranularity()) - index_granularity_bytes = data.settings.index_granularity_bytes; + index_granularity_bytes = data_settings->index_granularity_bytes; else index_granularity_bytes = 0; } diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 81ead0fd20d..1e9f1a252f4 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -641,13 +641,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, args.columns, indices_description, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, ttl_table_ast, merging_params, storage_settings, + sample_by_ast, ttl_table_ast, merging_params, std::move(storage_settings), args.has_force_restore_data_flag); else return StorageMergeTree::create( args.data_path, args.database_name, args.table_name, args.columns, indices_description, args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, - primary_key_ast, sample_by_ast, ttl_table_ast, merging_params, storage_settings, + primary_key_ast, sample_by_ast, ttl_table_ast, merging_params, std::move(storage_settings), args.has_force_restore_data_flag); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 9ed413157a2..f6975e34dfa 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -62,7 +62,7 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. const ASTPtr & ttl_table_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool has_force_restore_data_flag) : MergeTreeData(database_name_, table_name_, path_ + escapeForFileName(table_name_) + '/', @@ -801,14 +801,15 @@ Int64 StorageMergeTree::getCurrentMutationVersion( void StorageMergeTree::clearOldMutations() { - if (!settings.finished_mutations_to_keep) + const auto settings = getCOWSettings(); + if (!settings->finished_mutations_to_keep) return; std::vector mutations_to_delete; { std::lock_guard lock(currently_merging_mutex); - if (current_mutations_by_version.size() <= settings.finished_mutations_to_keep) + if (current_mutations_by_version.size() <= settings->finished_mutations_to_keep) return; auto begin_it = current_mutations_by_version.begin(); @@ -819,10 +820,10 @@ void StorageMergeTree::clearOldMutations() end_it = current_mutations_by_version.upper_bound(*min_version); size_t done_count = std::distance(begin_it, end_it); - if (done_count <= settings.finished_mutations_to_keep) + if (done_count <= settings->finished_mutations_to_keep) return; - size_t to_delete_count = done_count - settings.finished_mutations_to_keep; + size_t to_delete_count = done_count - settings->finished_mutations_to_keep; auto it = begin_it; for (size_t i = 0; i < to_delete_count; ++i) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0df90604f67..556cf7999b8 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -151,7 +151,7 @@ protected: const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. const ASTPtr & ttl_table_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool has_force_restore_data_flag); }; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 0b66e98d0dc..c788c940efc 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -202,7 +202,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ASTPtr & sample_by_ast_, const ASTPtr & ttl_table_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool has_force_restore_data_flag) : MergeTreeData(database_name_, table_name_, path_ + escapeForFileName(table_name_) + '/', @@ -376,7 +376,7 @@ void StorageReplicatedMergeTree::createTableIfNotExists() } -/** Verify that list of columns and table settings match those specified in ZK (/ metadata). +/** Verify that list of columns and table storage_settings match those specified in ZK (/ metadata). * If not, throw an exception. */ void StorageReplicatedMergeTree::checkTableStructure(bool skip_sanity_checks, bool allow_alter) @@ -633,7 +633,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) for (const auto & part : parts) total_rows_on_filesystem += part->rows_count; - bool insane = unexpected_parts_rows > total_rows_on_filesystem * settings.replicated_max_ratio_of_wrong_parts; + const auto storage_settings = getCOWSettings(); + bool insane = unexpected_parts_rows > total_rows_on_filesystem * storage_settings->replicated_max_ratio_of_wrong_parts; if (insane && !skip_sanity_checks) { @@ -776,12 +777,13 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: if (!has_been_already_added) { + const auto storage_settings = getCOWSettings(); String part_path = replica_path + "/parts/" + part_name; ops.emplace_back(zkutil::makeCheckRequest( zookeeper_path + "/columns", expected_columns_version)); - if (settings.use_minimalistic_part_header_in_zookeeper) + if (storage_settings->use_minimalistic_part_header_in_zookeeper) { ops.emplace_back(zkutil::makeCreateRequest( part_path, local_part_header.toString(), zkutil::CreateMode::Persistent)); @@ -858,7 +860,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const { return MinimalisticDataPartChecksums::getSerializedString(checksums, - static_cast(settings.use_minimalistic_checksums_in_zookeeper)); + getCOWSettings()->use_minimalistic_checksums_in_zookeeper); } @@ -1029,13 +1031,14 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) parts.push_back(part); } + const auto storage_settings = getCOWSettings(); if (!have_all_parts) { /// If you do not have all the necessary parts, try to take some already merged part from someone. LOG_DEBUG(log, "Don't have all parts for merge " << entry.new_part_name << "; will try to fetch it instead"); return false; } - else if (entry.create_time + settings.prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr)) + else if (entry.create_time + storage_settings->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr)) { /// If entry is old enough, and have enough size, and part are exists in any replica, /// then prefer fetching of merged part from replica. @@ -1044,7 +1047,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) for (const auto & part : parts) sum_parts_bytes_on_disk += part->bytes_on_disk; - if (sum_parts_bytes_on_disk >= settings.prefer_fetch_merged_part_size_threshold) + if (sum_parts_bytes_on_disk >= storage_settings->prefer_fetch_merged_part_size_threshold) { String replica = findReplicaHavingPart(entry.new_part_name, true); /// NOTE excessive ZK requests for same data later, may remove. if (!replica.empty()) @@ -1154,6 +1157,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedMergeTree::LogEntry & entry) { const String & source_part_name = entry.source_parts.at(0); + const auto storage_settings = getCOWSettings(); LOG_TRACE(log, "Executing log entry to mutate part " << source_part_name << " to " << entry.new_part_name); DataPartPtr source_part = getActiveContainingPart(source_part_name); @@ -1173,8 +1177,8 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM /// TODO - some better heuristic? size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}); - if (entry.create_time + settings.prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr) - && estimated_space_for_result >= settings.prefer_fetch_merged_part_size_threshold) + if (entry.create_time + storage_settings->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr) + && estimated_space_for_result >= storage_settings->prefer_fetch_merged_part_size_threshold) { /// If entry is old enough, and have enough size, and some replica has the desired part, /// then prefer fetching from replica. @@ -1268,20 +1272,21 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry) { String replica = findReplicaHavingCoveringPart(entry, true); + const auto storage_settings = getCOWSettings(); static std::atomic_uint total_fetches {0}; - if (settings.replicated_max_parallel_fetches && total_fetches >= settings.replicated_max_parallel_fetches) + if (storage_settings->replicated_max_parallel_fetches && total_fetches >= storage_settings->replicated_max_parallel_fetches) { - throw Exception("Too many total fetches from replicas, maximum: " + settings.replicated_max_parallel_fetches.toString(), + throw Exception("Too many total fetches from replicas, maximum: " + storage_settings->replicated_max_parallel_fetches.toString(), ErrorCodes::TOO_MANY_FETCHES); } ++total_fetches; SCOPE_EXIT({--total_fetches;}); - if (settings.replicated_max_parallel_fetches_for_table && current_table_fetches >= settings.replicated_max_parallel_fetches_for_table) + if (storage_settings->replicated_max_parallel_fetches_for_table && current_table_fetches >= storage_settings->replicated_max_parallel_fetches_for_table) { - throw Exception("Too many fetches from replicas for table, maximum: " + settings.replicated_max_parallel_fetches_for_table.toString(), + throw Exception("Too many fetches from replicas for table, maximum: " + storage_settings->replicated_max_parallel_fetches_for_table.toString(), ErrorCodes::TOO_MANY_FETCHES); } @@ -2162,6 +2167,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() if (!is_leader) return; + const auto storage_settings = getCOWSettings(); const bool deduplicate = false; /// TODO: read deduplicate option from table config const bool force_ttl = false; @@ -2181,16 +2187,16 @@ void StorageReplicatedMergeTree::mergeSelectingTask() /// Otherwise merge queue could be filled with only large merges, /// and in the same time, many small parts could be created and won't be merged. size_t merges_and_mutations_queued = queue.countMergesAndPartMutations(); - if (merges_and_mutations_queued >= settings.max_replicated_merges_in_queue) + if (merges_and_mutations_queued >= storage_settings->max_replicated_merges_in_queue) { LOG_TRACE(log, "Number of queued merges and part mutations (" << merges_and_mutations_queued << ") is greater than max_replicated_merges_in_queue (" - << settings.max_replicated_merges_in_queue << "), so won't select new parts to merge or mutate."); + << storage_settings->max_replicated_merges_in_queue << "), so won't select new parts to merge or mutate."); } else { UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge( - settings.max_replicated_merges_in_queue, merges_and_mutations_queued); + storage_settings->max_replicated_merges_in_queue, merges_and_mutations_queued); UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation(); FutureMergedMutatedPart future_merged_part; @@ -2973,10 +2979,11 @@ void StorageReplicatedMergeTree::assertNotReadonly() const BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const Context & context) { + const auto storage_settings = getCOWSettings(); assertNotReadonly(); const Settings & query_settings = context.getSettingsRef(); - bool deduplicate = settings.replicated_deduplication_window != 0 && query_settings.insert_deduplicate; + bool deduplicate = storage_settings->replicated_deduplication_window != 0 && query_settings.insert_deduplicate; return std::make_shared(*this, query_settings.insert_quorum, query_settings.insert_quorum_timeout.totalMilliseconds(), query_settings.max_partitions_per_insert_block, deduplicate); @@ -3010,6 +3017,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p }; bool force_ttl = (final && (hasTableTTL() || hasAnyColumnTTL())); + const auto storage_settings = getCOWSettings(); if (!partition && final) { @@ -3042,7 +3050,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p if (!partition) { selected = merger_mutator.selectPartsToMerge( - future_merged_part, true, settings.max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason); + future_merged_part, true, storage_settings->max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason); } else { @@ -3092,9 +3100,9 @@ void StorageReplicatedMergeTree::alter( if (params.isSettingsAlter()) { - /// We don't replicate settings ALTER. It's local operation. + /// We don't replicate storage_settings ALTER. It's local operation. /// Also we don't upgrade alter lock to table structure lock. - LOG_DEBUG(log, "ALTER settings only"); + LOG_DEBUG(log, "ALTER storage_settings only"); SettingsChanges new_changes; params.applyForSettingsOnly(new_changes); alterSettings(new_changes, current_database_name, current_table_name, query_context, table_lock_holder); @@ -3920,9 +3928,10 @@ void StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(const String & void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) { auto zookeeper = tryGetZooKeeper(); + const auto storage_settings = getCOWSettings(); res.is_leader = is_leader; - res.can_become_leader = settings.replicated_can_become_leader; + res.can_become_leader = storage_settings->replicated_can_become_leader; res.is_readonly = is_readonly; res.is_session_expired = !zookeeper || zookeeper->expired(); @@ -4112,13 +4121,14 @@ void StorageReplicatedMergeTree::getReplicaDelays(time_t & out_absolute_delay, t out_absolute_delay = getAbsoluteDelay(); out_relative_delay = 0; + const auto storage_settings = getCOWSettings(); /** Relative delay is the maximum difference of absolute delay from any other replica, * (if this replica lags behind any other live replica, or zero, otherwise). * Calculated only if the absolute delay is large enough. */ - if (out_absolute_delay < static_cast(settings.min_relative_delay_to_yield_leadership)) + if (out_absolute_delay < static_cast(storage_settings->min_relative_delay_to_yield_leadership)) return; auto zookeeper = getZooKeeper(); @@ -4376,7 +4386,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const /// instead. /// /// Mutations of individual parts are in fact pretty similar to merges, e.g. their assignment and execution - /// is governed by the same settings. TODO: support a single "merge-mutation" operation when the data + /// is governed by the same storage_settings. TODO: support a single "merge-mutation" operation when the data /// read from the the source parts is first mutated on the fly to some uniform mutation version and then /// merged to a resulting part. /// @@ -4939,6 +4949,7 @@ void StorageReplicatedMergeTree::getCommitPartOps( const String & block_id_path) const { const String & part_name = part->name; + const auto storage_settings = getCOWSettings(); if (!block_id_path.empty()) { @@ -4956,7 +4967,7 @@ void StorageReplicatedMergeTree::getCommitPartOps( zookeeper_path + "/columns", columns_version)); - if (settings.use_minimalistic_part_header_in_zookeeper) + if (storage_settings->use_minimalistic_part_header_in_zookeeper) { ops.emplace_back(zkutil::makeCreateRequest( replica_path + "/parts/" + part->name, @@ -4985,11 +4996,12 @@ void StorageReplicatedMergeTree::updatePartHeaderInZooKeeperAndCommit( AlterDataPartTransaction & transaction) { String part_path = replica_path + "/parts/" + transaction.getPartName(); + const auto storage_settings = getCOWSettings(); bool need_delete_columns_and_checksums_nodes = false; try { - if (settings.use_minimalistic_part_header_in_zookeeper) + if (storage_settings->use_minimalistic_part_header_in_zookeeper) { auto part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( transaction.getNewColumns(), transaction.getNewChecksums()); @@ -5169,8 +5181,9 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, const C bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const { - return settings.index_granularity_bytes != 0 && - (settings.enable_mixed_granularity_parts || + const auto storage_settings = getCOWSettings(); + return storage_settings->index_granularity_bytes != 0 && + (storage_settings->enable_mixed_granularity_parts || (!has_non_adaptive_index_granularity_parts && !other_replicas_fixed_granularity)); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 7f632ab4cb4..a32a9f19473 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -534,7 +534,7 @@ protected: const ASTPtr & sample_by_ast_, const ASTPtr & table_ttl_ast_, const MergingParams & merging_params_, - const MergeTreeSettings & settings_, + MergeTreeSettingsPtr settings_, bool has_force_restore_data_flag); }; From 9e78781378fba7cae91239ffd02cdfcbd5314e57 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 13:39:06 +0300 Subject: [PATCH 19/59] Fix tests --- dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql | 4 ++-- .../0_stateless/00980_zookeeper_merge_tree_alter_settings.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql index 866cf960710..973d9b95d20 100644 --- a/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql +++ b/dbms/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -5,7 +5,7 @@ CREATE TABLE log_for_alter ( Data String ) ENGINE = Log(); -ALTER TABLE log_for_alter MODIFY SETTING aaa=123; -- { serverError 468 } +ALTER TABLE log_for_alter MODIFY SETTING aaa=123; -- { serverError 469 } DROP TABLE IF EXISTS log_for_alter; @@ -16,7 +16,7 @@ CREATE TABLE table_for_alter ( Data String ) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity=4096; -ALTER TABLE table_for_alter MODIFY SETTING index_granularity=555; -- { serverError 469 } +ALTER TABLE table_for_alter MODIFY SETTING index_granularity=555; -- { serverError 470 } SHOW CREATE TABLE table_for_alter; diff --git a/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql index 9c7a7c33329..24c1135c247 100644 --- a/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql +++ b/dbms/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql @@ -13,7 +13,7 @@ CREATE TABLE replicated_table_for_alter2 ( SHOW CREATE TABLE replicated_table_for_alter1; -ALTER TABLE replicated_table_for_alter1 MODIFY SETTING index_granularity = 4096; -- { serverError 469 } +ALTER TABLE replicated_table_for_alter1 MODIFY SETTING index_granularity = 4096; -- { serverError 470 } SHOW CREATE TABLE replicated_table_for_alter1; From 1b68d79c59e7037f52263a3d1ae1331037cb16f0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 13:56:58 +0300 Subject: [PATCH 20/59] Revert immutable settings --- dbms/src/Core/SettingsCommon.cpp | 129 +++++------------------------- dbms/src/Core/SettingsCommon.h | 133 +++++++------------------------ 2 files changed, 49 insertions(+), 213 deletions(-) diff --git a/dbms/src/Core/SettingsCommon.cpp b/dbms/src/Core/SettingsCommon.cpp index 2d3ed4f6e14..988a4b2d736 100644 --- a/dbms/src/Core/SettingsCommon.cpp +++ b/dbms/src/Core/SettingsCommon.cpp @@ -34,19 +34,20 @@ namespace ErrorCodes template String SettingNumber::toString() const { - return DB::toString(getValue()); + return DB::toString(value); } template Field SettingNumber::toField() const { - return getValue(); + return value; } template void SettingNumber::set(Type x) { - data.store(Data{x, true}, std::memory_order_relaxed); + value = x; + changed = true; } template @@ -58,14 +59,6 @@ void SettingNumber::set(const Field & x) set(applyVisitor(FieldVisitorConvertToNumber(), x)); } - -template -SettingNumber & SettingNumber::operator= (const SettingNumber & o) -{ - data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; -} - template void SettingNumber::set(const String & x) { @@ -100,9 +93,9 @@ template void SettingNumber::serialize(WriteBuffer & buf) const { if constexpr (std::is_integral_v && std::is_unsigned_v) - writeVarUInt(static_cast(getValue()), buf); + writeVarUInt(static_cast(value), buf); else if constexpr (std::is_integral_v && std::is_signed_v) - writeVarInt(static_cast(getValue()), buf); + writeVarInt(static_cast(value), buf); else { static_assert(std::is_floating_point_v); @@ -140,28 +133,22 @@ template struct SettingNumber; template struct SettingNumber; -SettingMaxThreads & SettingMaxThreads::operator= (const SettingMaxThreads & o) -{ - data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; -} - String SettingMaxThreads::toString() const { - auto d = data.load(std::memory_order_relaxed); /// Instead of the `auto` value, we output the actual value to make it easier to see. - return d.is_auto ? ("auto(" + DB::toString(d.value) + ")") : DB::toString(d.value); + return is_auto ? ("auto(" + DB::toString(value) + ")") : DB::toString(value); } Field SettingMaxThreads::toField() const { - auto d = data.load(std::memory_order_relaxed); - return d.is_auto ? 0 : d.value; + return is_auto ? 0 : value; } void SettingMaxThreads::set(UInt64 x) { - data.store({x ? x : getAutoValue(), x == 0, true}); + value = x ? x : getAutoValue(); + is_auto = x == 0; + changed = true; } void SettingMaxThreads::set(const Field & x) @@ -182,8 +169,7 @@ void SettingMaxThreads::set(const String & x) void SettingMaxThreads::serialize(WriteBuffer & buf) const { - auto d = data.load(std::memory_order_relaxed); - writeVarUInt(d.is_auto ? 0 : d.value, buf); + writeVarUInt(is_auto ? 0 : value, buf); } void SettingMaxThreads::deserialize(ReadBuffer & buf) @@ -195,7 +181,8 @@ void SettingMaxThreads::deserialize(ReadBuffer & buf) void SettingMaxThreads::setAuto() { - data.store({getAutoValue(), true, isChanged()}); + value = getAutoValue(); + is_auto = true; } UInt64 SettingMaxThreads::getAutoValue() const @@ -204,54 +191,22 @@ UInt64 SettingMaxThreads::getAutoValue() const return res; } -void SettingMaxThreads::setChanged(bool changed) -{ - auto d = data.load(std::memory_order_relaxed); - data.store({d.value, d.is_auto, changed}); -} - - -template -SettingTimespan & SettingTimespan::operator= (const SettingTimespan & o) -{ - std::shared_lock lock_o(o.mutex); - value = o.value; - changed = o.changed; - return *this; -} - -template -SettingTimespan::SettingTimespan(const SettingTimespan & o) -{ - std::shared_lock lock_o(o.mutex); - value = o.value; - changed = o.changed; -} - - -template -void SettingTimespan::setChanged(bool c) -{ - std::unique_lock lock(mutex); - changed = c; -} template String SettingTimespan::toString() const { - return DB::toString(getValue().totalMicroseconds() / microseconds_per_io_unit); + return DB::toString(value.totalMicroseconds() / microseconds_per_io_unit); } template Field SettingTimespan::toField() const { - return getValue().totalMicroseconds() / microseconds_per_io_unit; + return value.totalMicroseconds() / microseconds_per_io_unit; } template void SettingTimespan::set(const Poco::Timespan & x) { - std::unique_lock lock(mutex); value = x; changed = true; } @@ -280,7 +235,7 @@ void SettingTimespan::set(const String & x) template void SettingTimespan::serialize(WriteBuffer & buf) const { - writeVarUInt(getValue().totalMicroseconds() / microseconds_per_io_unit, buf); + writeVarUInt(value.totalMicroseconds() / microseconds_per_io_unit, buf); } template @@ -294,47 +249,23 @@ void SettingTimespan::deserialize(ReadBuffer & buf) template struct SettingTimespan; template struct SettingTimespan; -SettingString & SettingString::operator= (const SettingString & o) -{ - std::shared_lock lock_o(o.mutex); - value = o.value; - changed = o.changed; - return *this; -} - -SettingString::SettingString(const SettingString & o) -{ - std::shared_lock lock(o.mutex); - value = o.value; - changed = o.changed; -} - String SettingString::toString() const { - std::shared_lock lock(mutex); return value; } Field SettingString::toField() const { - std::shared_lock lock(mutex); return value; } void SettingString::set(const String & x) { - std::unique_lock lock(mutex); value = x; changed = true; } -void SettingString::setChanged(bool c) -{ - std::unique_lock lock(mutex); - changed = c; -} - void SettingString::set(const Field & x) { set(safeGet(x)); @@ -352,15 +283,10 @@ void SettingString::deserialize(ReadBuffer & buf) set(s); } -SettingChar & SettingChar::operator= (const SettingChar & o) -{ - data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; -} String SettingChar::toString() const { - return String(1, getValue()); + return String(1, value); } Field SettingChar::toField() const @@ -370,7 +296,8 @@ Field SettingChar::toField() const void SettingChar::set(char x) { - data.store({x, true}); + value = x; + changed = true; } void SettingChar::set(const String & x) @@ -400,19 +327,6 @@ void SettingChar::deserialize(ReadBuffer & buf) } -template -SettingEnum & SettingEnum::operator= (const SettingEnum & o) -{ - data.store(o.data.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; -} - -template -void SettingEnum::set(EnumType x) -{ - data.store({x, true}, std::memory_order_relaxed); -} - template void SettingEnum::serialize(WriteBuffer & buf) const { @@ -428,7 +342,6 @@ void SettingEnum::deserialize(ReadBuffer & buf) } - #define IMPLEMENT_SETTING_ENUM(ENUM_NAME, LIST_OF_NAMES_MACRO, ERROR_CODE_FOR_UNEXPECTED_NAME) \ IMPLEMENT_SETTING_ENUM_WITH_TAG(ENUM_NAME, void, LIST_OF_NAMES_MACRO, ERROR_CODE_FOR_UNEXPECTED_NAME) @@ -438,7 +351,7 @@ void SettingEnum::deserialize(ReadBuffer & buf) { \ using EnumType = ENUM_NAME; \ using UnderlyingType = std::underlying_type::type; \ - switch (static_cast(getValue())) \ + switch (static_cast(value)) \ { \ LIST_OF_NAMES_MACRO(IMPLEMENT_SETTING_ENUM_TO_STRING_HELPER_) \ } \ diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 83d90c28f53..d4607e70904 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -8,8 +8,6 @@ #include #include #include -#include -#include namespace DB @@ -34,25 +32,13 @@ namespace ErrorCodes template struct SettingNumber { - struct Data - { - Type value; - bool changed; - }; + Type value; + bool changed = false; - std::atomic data; - - SettingNumber(Type x = 0) : data{{x, false}} {} - SettingNumber(const SettingNumber & o) : data{o.data.load(std::memory_order_relaxed)} {} - - bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } - void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed); } - - operator Type() const { return getValue(); } - Type getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingNumber(Type x = 0) : value(x) {} + operator Type() const { return value; } SettingNumber & operator= (Type x) { set(x); return *this; } - SettingNumber & operator= (const SettingNumber & o); /// Serialize to a test string. String toString() const; @@ -87,26 +73,14 @@ using SettingBool = SettingNumber; */ struct SettingMaxThreads { - struct Data - { - UInt64 value; - bool is_auto; - bool changed; - }; + UInt64 value; + bool is_auto; + bool changed = false; - std::atomic data; - - SettingMaxThreads(UInt64 x = 0) : data{{x ? x : getAutoValue(), x == 0, false}} {} - SettingMaxThreads(const SettingMaxThreads & o) : data{o.data.load(std::memory_order_relaxed)} {} - - bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } - void setChanged(bool changed); - - operator UInt64() const { return getValue(); } - UInt64 getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingMaxThreads(UInt64 x = 0) : value(x ? x : getAutoValue()), is_auto(x == 0) {} + operator UInt64() const { return value; } SettingMaxThreads & operator= (UInt64 x) { set(x); return *this; } - SettingMaxThreads & operator= (const SettingMaxThreads & o); String toString() const; Field toField() const; @@ -118,7 +92,6 @@ struct SettingMaxThreads void serialize(WriteBuffer & buf) const; void deserialize(ReadBuffer & buf); - bool isAuto() const { return data.load(std::memory_order_relaxed).is_auto; } void setAuto(); UInt64 getAutoValue() const; }; @@ -129,37 +102,16 @@ enum class SettingTimespanIO { MILLISECOND, SECOND }; template struct SettingTimespan { - mutable std::shared_mutex mutex; Poco::Timespan value; bool changed = false; SettingTimespan(UInt64 x = 0) : value(x * microseconds_per_io_unit) {} - SettingTimespan(const SettingTimespan & o); - operator Poco::Timespan() const { return getValue(); } - Poco::Timespan getValue() const { std::shared_lock lock(mutex); return value; } + operator Poco::Timespan() const { return value; } SettingTimespan & operator= (const Poco::Timespan & x) { set(x); return *this; } - SettingTimespan & operator= (const SettingTimespan & o); - Poco::Timespan::TimeDiff totalSeconds() const - { - std::shared_lock lock(mutex); - return value.totalSeconds(); - } - - Poco::Timespan::TimeDiff totalMilliseconds() const - { - std::shared_lock lock(mutex); - return value.totalMilliseconds(); - } - - bool isChanged() const - { - std::shared_lock lock(mutex); - return changed; - } - - void setChanged(bool changed); + Poco::Timespan::TimeDiff totalSeconds() const { return value.totalSeconds(); } + Poco::Timespan::TimeDiff totalMilliseconds() const { return value.totalMilliseconds(); } String toString() const; Field toField() const; @@ -182,19 +134,13 @@ using SettingMilliseconds = SettingTimespan; struct SettingString { - mutable std::shared_mutex mutex; String value; bool changed = false; SettingString(const String & x = String{}) : value(x) {} - SettingString(const SettingString & o); - operator String() const { return getValue(); } - String getValue() const { std::shared_lock lock(mutex); return value; } + operator String() const { return value; } SettingString & operator= (const String & x) { set(x); return *this; } - SettingString & operator= (const SettingString & o); - bool isChanged() const { std::shared_lock lock(mutex); return changed; } - void setChanged(bool changed); String toString() const; Field toField() const; @@ -210,25 +156,13 @@ struct SettingString struct SettingChar { public: - struct Data - { - char value; - bool changed; - }; + char value; + bool changed = false; - std::atomic data; - - SettingChar(char x = '\0') : data({x, false}) {} - SettingChar(const SettingChar & o) : data{o.data.load(std::memory_order_relaxed)} {} - - operator char() const { return getValue(); } - char getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingChar(char x = '\0') : value(x) {} + operator char() const { return value; } SettingChar & operator= (char x) { set(x); return *this; } - SettingChar & operator= (const SettingChar & o); - - bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } - void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed);} String toString() const; Field toField() const; @@ -246,30 +180,18 @@ public: template struct SettingEnum { - struct Data - { - EnumType value; - bool changed; - }; + EnumType value; + bool changed = false; - std::atomic data; - - SettingEnum(EnumType x) : data({x, false}) {} - SettingEnum(const SettingEnum & o) : data{o.data.load(std::memory_order_relaxed)} {} - - operator EnumType() const { return getValue(); } - EnumType getValue() const { return data.load(std::memory_order_relaxed).value; } + SettingEnum(EnumType x) : value(x) {} + operator EnumType() const { return value; } SettingEnum & operator= (EnumType x) { set(x); return *this; } - SettingEnum & operator= (const SettingEnum & o); - - bool isChanged() const { return data.load(std::memory_order_relaxed).changed; } - void setChanged(bool changed) { data.store({getValue(), changed}, std::memory_order_relaxed);} String toString() const; Field toField() const { return toString(); } - void set(EnumType x); + void set(EnumType x) { value = x; changed = true; } void set(const Field & x) { set(safeGet(x)); } void set(const String & x); @@ -386,7 +308,6 @@ private: Derived & castToDerived() { return *static_cast(this); } const Derived & castToDerived() const { return *static_cast(this); } - using IsChangedFunction = bool (*)(const Derived &); using GetStringFunction = String (*)(const Derived &); using GetFieldFunction = Field (*)(const Derived &); using SetStringFunction = void (*)(Derived &, const String &); @@ -397,7 +318,7 @@ private: struct MemberInfo { - IsChangedFunction is_changed; + size_t offset_of_changed; StringRef name; StringRef description; /// Can be updated after first load for config/definition. @@ -412,7 +333,7 @@ private: DeserializeFunction deserialize; CastValueWithoutApplyingFunction cast_value_without_applying; - bool isChanged(const Derived & collection) const { return is_changed(collection); } + bool isChanged(const Derived & collection) const { return *reinterpret_cast(reinterpret_cast(&collection) + offset_of_changed); } }; class MemberInfos @@ -772,7 +693,8 @@ public: #define IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - add({[](const Derived & d) { return d.NAME.isChanged(); }, \ + static_assert(std::is_same_v().NAME.changed), bool>); \ + add({offsetof(Derived, NAME.changed), \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ @@ -780,7 +702,8 @@ public: &Functions::NAME##_castValueWithoutApplying }); #define IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - add({[](const Derived & d) { return d.NAME.isChanged(); }, \ + static_assert(std::is_same_v().NAME.changed), bool>); \ + add({offsetof(Derived, NAME.changed), \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ From 7b1039568ce987f02329e347251e779070d67380 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 14:24:18 +0300 Subject: [PATCH 21/59] Revert old changed --- dbms/programs/client/Client.cpp | 4 ++-- dbms/programs/copier/ClusterCopier.cpp | 2 +- dbms/src/Core/SettingsCommon.h | 13 ++++++------- dbms/src/Interpreters/ClusterProxy/executeQuery.cpp | 6 +++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 4 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeSettings.cpp | 2 +- 8 files changed, 18 insertions(+), 19 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index f509ebf8c1f..feffd7e6cfe 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -225,7 +225,7 @@ private: for (auto && setting : context.getSettingsRef()) { const String & name = setting.getName().toString(); - if (config().has(name) && !setting.isChanged()) + if (config().has(name) && setting.isChanged()) setting.setValue(config().getString(name)); } @@ -323,7 +323,7 @@ private: insert_format = "Values"; /// Setting value from cmd arg overrides one from config - if (context.getSettingsRef().max_insert_block_size.isChanged()) + if (context.getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = context.getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", context.getSettingsRef().max_insert_block_size); diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 5ced5677569..435d06da854 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -708,7 +708,7 @@ void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & c auto set_default_value = [] (auto && setting, auto && default_value) { - setting = setting.isChanged() ? setting.getValue() : default_value; + setting = setting.changed ? setting.value : default_value; }; /// Override important settings diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index d4607e70904..1410dc2611f 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -308,6 +308,7 @@ private: Derived & castToDerived() { return *static_cast(this); } const Derived & castToDerived() const { return *static_cast(this); } + using IsChangedFunction = bool (*)(const Derived &); using GetStringFunction = String (*)(const Derived &); using GetFieldFunction = Field (*)(const Derived &); using SetStringFunction = void (*)(Derived &, const String &); @@ -318,11 +319,11 @@ private: struct MemberInfo { - size_t offset_of_changed; + IsChangedFunction is_changed; StringRef name; StringRef description; /// Can be updated after first load for config/definition. - /// Non updatable settings can be isChanged, + /// Non updatable settings can be `changed`, /// if they were overwritten in config/definition. bool updateable; GetStringFunction get_string; @@ -333,7 +334,7 @@ private: DeserializeFunction deserialize; CastValueWithoutApplyingFunction cast_value_without_applying; - bool isChanged(const Derived & collection) const { return *reinterpret_cast(reinterpret_cast(&collection) + offset_of_changed); } + bool isChanged(const Derived & collection) const { return is_changed(collection); } }; class MemberInfos @@ -693,8 +694,7 @@ public: #define IMPLEMENT_SETTINGS_COLLECTION_ADD_MUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - static_assert(std::is_same_v().NAME.changed), bool>); \ - add({offsetof(Derived, NAME.changed), \ + add({[](const Derived & d) { return d.NAME.changed; }, \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), true, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ @@ -702,8 +702,7 @@ public: &Functions::NAME##_castValueWithoutApplying }); #define IMPLEMENT_SETTINGS_COLLECTION_ADD_IMMUTABLE_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \ - static_assert(std::is_same_v().NAME.changed), bool>); \ - add({offsetof(Derived, NAME.changed), \ + add({[](const Derived & d) { return d.NAME.changed; }, \ StringRef(#NAME, strlen(#NAME)), StringRef(#DESCRIPTION, strlen(#DESCRIPTION)), false, \ &Functions::NAME##_getString, &Functions::NAME##_getField, \ &Functions::NAME##_setString, &Functions::NAME##_setField, \ diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index e4e52eab2b7..25d5a6eb0d4 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -26,9 +26,9 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.max_memory_usage_for_all_queries = 0; /// Set as unchanged to avoid sending to remote server. - new_settings.max_concurrent_queries_for_user.setChanged(false); - new_settings.max_memory_usage_for_user.setChanged(false); - new_settings.max_memory_usage_for_all_queries.setChanged(false); + new_settings.max_concurrent_queries_for_user.changed = false; + new_settings.max_memory_usage_for_user.changed = false; + new_settings.max_memory_usage_for_all_queries = false; Context new_context(context); new_context.setSettings(new_settings); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index dfd0ec63643..282b19991b1 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -788,7 +788,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( InJoinSubqueriesPreprocessor(context).visit(query); /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.getValue()).perform(); + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); } /// Creates a dictionary `aliases`: alias -> ASTPtr diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index afd26fde113..f5a9c2ac4b6 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -440,7 +440,7 @@ void registerStorageKafka(StorageFactory & factory) #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME) \ /* One of the four required arguments is not specified */ \ if (args_count < ARG_NUM && ARG_NUM <= 4 && \ - !kafka_settings.PAR_NAME.isChanged()) \ + !kafka_settings.PAR_NAME.changed) \ { \ throw Exception( \ "Required parameter '" #PAR_NAME "' " \ @@ -449,7 +449,7 @@ void registerStorageKafka(StorageFactory & factory) } \ /* The same argument is given in two places */ \ if (has_settings && \ - kafka_settings.PAR_NAME.isChanged() && \ + kafka_settings.PAR_NAME.changed && \ args_count >= ARG_NUM) \ { \ throw Exception( \ diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 314967b485d..15a19bbc924 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -400,8 +400,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( if (relative_sample_size == RelativeSize(0)) relative_sample_size = 1; - relative_sample_size /= settings.parallel_replicas_count.getValue(); - relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.getValue()); + relative_sample_size /= settings.parallel_replicas_count.value; + relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); } if (relative_sample_offset >= RelativeSize(1)) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp index 7c3c7ef02cc..5bc44eee842 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -69,7 +69,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) if (std::find_if(changes.begin(), changes.end(), \ [](const SettingChange & c) { return c.name == #NAME; }) \ == changes.end()) \ - changes.push_back(SettingChange{#NAME, NAME.getValue()}); + changes.push_back(SettingChange{#NAME, NAME.value}); APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(ADD_IF_ABSENT) #undef ADD_IF_ABSENT From a4d6e8d5b2f9f53487599ad510f471d4e5589492 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 14:28:28 +0300 Subject: [PATCH 22/59] Revert latomic --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ac0224ac56b..d369dca7e78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -350,7 +350,7 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_L endif () # Add Libc. GLIBC is actually a collection of interdependent libraries. - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lrt -ldl -lpthread -lm -lc -latomic") + set (DEFAULT_LIBS "${DEFAULT_LIBS} -lrt -ldl -lpthread -lm -lc") # Note: we'd rather use Musl libc library, but it's little bit more difficult to use. From bf5ec73582d413d9bb3dca1b32877bcad86eae86 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 14:40:45 +0300 Subject: [PATCH 23/59] Fix clang build --- dbms/src/Interpreters/Context.cpp | 7 ++++--- dbms/src/Storages/MergeTree/MergeTreeSettings.h | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 0b0cf657987..17b031dbd64 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -148,7 +148,7 @@ struct ContextShared std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; - std::optional merge_tree_settings; /// Settings of MergeTree* engines. + MergeTreeSettingsPtr merge_tree_settings; /// Settings of MergeTree* engines. size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. @@ -1759,8 +1759,9 @@ const MergeTreeSettings & Context::getMergeTreeSettings() const if (!shared->merge_tree_settings) { auto & config = getConfigRef(); - shared->merge_tree_settings.emplace(); - shared->merge_tree_settings->loadFromConfig("merge_tree", config); + MutableMergeTreeSettingsPtr settings_ptr = MergeTreeSettings::create(); + settings_ptr->loadFromConfig("merge_tree", config); + shared->merge_tree_settings = std::move(settings_ptr); } return *shared->merge_tree_settings; diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index dff7cc2e523..c098aac2526 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -101,7 +101,9 @@ struct MergeTreeSettings : public SettingsCollection, public void loadFromQuery(ASTStorage & storage_def); MutablePtr clone() const; - ~MergeTreeSettings() {} +private: + MergeTreeSettings() = default; + MergeTreeSettings(const MergeTreeSettings & o) = default; }; using MergeTreeSettingsPtr = MergeTreeSettings::Ptr; From 5b85373c566293aa52cd6720525f35021d57b619 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 14:58:59 +0300 Subject: [PATCH 24/59] Fix stupid bug --- dbms/programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index feffd7e6cfe..cf72d7a87c3 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -225,7 +225,7 @@ private: for (auto && setting : context.getSettingsRef()) { const String & name = setting.getName().toString(); - if (config().has(name) && setting.isChanged()) + if (config().has(name) && !setting.isChanged()) setting.setValue(config().getString(name)); } From b00903330fe26276049929cb30079129e5d8bf85 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 15:03:42 +0300 Subject: [PATCH 25/59] Remove empty block --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 4d47f60a2fe..97d74d7a037 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1644,12 +1644,11 @@ void MergeTreeData::alterSettings( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - { - MutableMergeTreeSettingsPtr settings = std::move(*guarded_settings.getPtr()).mutate(); - settings->updateFromChanges(new_changes); - IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); - guarded_settings.setPtr(std::move(settings)); - } + /// No additional locking required, because we took table_lock_holder + MutableMergeTreeSettingsPtr settings = std::move(*guarded_settings.getPtr()).mutate(); + settings->updateFromChanges(new_changes); + IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); + guarded_settings.setPtr(std::move(settings)); } bool MergeTreeData::hasSetting(const String & setting_name) const From f988090111cf28fb8a8a300cabda0aa0d5d8ae21 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 15:10:58 +0300 Subject: [PATCH 26/59] Remove redundant move --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 97d74d7a037..aab384552cc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -118,7 +118,7 @@ MergeTreeData::MergeTreeData( full_path(full_path_), broken_part_callback(broken_part_callback_), log_name(database_name + "." + table_name), log(&Logger::get(log_name)), - guarded_settings(std::move(settings_)), + guarded_settings(settings_), data_parts_by_info(data_parts_indexes.get()), data_parts_by_state_and_info(data_parts_indexes.get()) { From 031bfc7bf331ecc24345f9f9b5b8040b2994090e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 17:46:18 +0300 Subject: [PATCH 27/59] Remove reference --- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index b94555477f9..8149ef9ddfe 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -707,7 +707,7 @@ protected: {} const MergeTreeSettingsPtr copyPtr() const { return settings_ptr; } - MergeTreeSettingsPtr & getPtr() { return settings_ptr; } + MergeTreeSettingsPtr getPtr() { return settings_ptr; } void setPtr(MergeTreeSettingsPtr ptr) { settings_ptr = ptr; } }; From 96f62fefcc162ceb1700e2fd91efffbc0fe964e4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 13 Aug 2019 19:34:52 +0300 Subject: [PATCH 28/59] Add mutex for settings --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index aab384552cc..3c14b7b1084 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1644,7 +1644,7 @@ void MergeTreeData::alterSettings( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - /// No additional locking required, because we took table_lock_holder + std::unique_lock lock(settings_mutex); MutableMergeTreeSettingsPtr settings = std::move(*guarded_settings.getPtr()).mutate(); settings->updateFromChanges(new_changes); IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 8149ef9ddfe..e1cfe7b0032 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -662,6 +662,7 @@ public: /// get consistent settings. const MergeTreeSettingsPtr getCOWSettings() const { + std::shared_lock lock(settings_mutex); return guarded_settings.copyPtr(); } @@ -801,6 +802,8 @@ protected: std::mutex grab_old_parts_mutex; /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; + /// Mutex for settings usage + mutable std::shared_mutex settings_mutex; void setPrimaryKeyIndicesAndColumns(const ASTPtr & new_order_by_ast, const ASTPtr & new_primary_key_ast, const ColumnsDescription & new_columns, From 99e85ff7530cca14d881d54321809cc52969243e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 19 Aug 2019 11:25:07 +0300 Subject: [PATCH 29/59] Fix build --- dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c04dc372c8..8cbb0819d20 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -871,7 +871,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO return streams; /// Let's split ranges to avoid reading much data. - auto split_ranges = [rows_granularity = data.settings.index_granularity, max_block_size](const auto & ranges, int direction) + auto split_ranges = [rows_granularity = data_settings->index_granularity, max_block_size](const auto & ranges, int direction) { MarkRanges new_ranges; const size_t max_marks_in_range = (max_block_size + rows_granularity - 1) / rows_granularity; From 69015f292e3af3824868839a31510b48c84efd9a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 19 Aug 2019 14:18:39 +0300 Subject: [PATCH 30/59] Fix wrong merge --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 7c4b3fd1fcb..1d064b9b0c7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1032,7 +1032,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor NameSet files_to_skip = {"checksums.txt", "columns.txt"}; auto mrk_extension = data_settings->index_granularity_bytes ? getAdaptiveMrkExtension() : getNonAdaptiveMrkExtension(); - for (const auto & entry : in_header) + for (const auto & entry : updated_header) { IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) { From f2a8c18c1ff98779a21451209d43a326d22b1f53 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 19 Aug 2019 14:18:58 +0300 Subject: [PATCH 31/59] Simplify lock --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6f6c43facf8..29aafc1aa45 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1645,7 +1645,7 @@ void MergeTreeData::alterSettings( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - std::unique_lock lock(settings_mutex); + std::lock_guard lock(settings_mutex); MutableMergeTreeSettingsPtr settings = std::move(*guarded_settings.getPtr()).mutate(); settings->updateFromChanges(new_changes); IStorage::alterSettings(new_changes, current_database_name, current_table_name, context, table_lock_holder); From 6d78e3be94b56f5e844238af7a688f0125d7e274 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 21 Aug 2019 11:12:39 +0300 Subject: [PATCH 32/59] hasToken function implementation * Function to check if given token is present in a string; * Special case for hasToken to 'tokenbf_v1' index; * Test cases for hasToken() * Test case for hasToken() + 'tokenbf_v1' integration --- dbms/src/Common/StringSearcher.h | 79 +++++++++- dbms/src/Common/Volnitsky.h | 14 +- dbms/src/Functions/FunctionsStringSearch.cpp | 76 ++++++++++ .../MergeTree/MergeTreeIndexFullText.cpp | 13 ++ .../queries/0_stateless/00990_hasToken.python | 124 ++++++++++++++++ .../0_stateless/00990_hasToken.reference | 139 ++++++++++++++++++ .../queries/0_stateless/00990_hasToken.sh | 8 + .../00990_hasToken_and_tokenbf.reference | 3 + .../00990_hasToken_and_tokenbf.sql | 33 +++++ 9 files changed, 481 insertions(+), 8 deletions(-) create mode 100755 dbms/tests/queries/0_stateless/00990_hasToken.python create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken.reference create mode 100755 dbms/tests/queries/0_stateless/00990_hasToken.sh create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference create mode 100644 dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index 5e78ff23df1..fecf1a7ca81 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -23,6 +25,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_PARAMETER; + extern const int BAD_ARGUMENTS; } @@ -157,7 +160,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; @@ -374,7 +377,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -567,7 +570,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -697,11 +700,81 @@ public: } }; +// Searches for needle surrounded by token-separators. +// Separators are anything inside ASCII (0-128) and not alphanum. +// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings +// should work just fine. But any Unicode whitespace is not considered a token separtor. +template +class TokenSearcher +{ + StringSearcher searcher; + size_t needle_size; + +public: + TokenSearcher(const char * const needle_, const size_t needle_size_) + : searcher{needle_, needle_size_}, + needle_size(needle_size_) + { + if (std::any_of(reinterpret_cast(needle_), reinterpret_cast(needle_) + needle_size_, isTokenSeparator)) + { + throw Exception{"needle must not contain whitespace characters", ErrorCodes::BAD_ARGUMENTS}; + } + + } + + ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const + { + // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. + if (isToken(haystack, haystack_end, pos)) + return searcher.compare(haystack, haystack_end, pos); + + return false; + } + + const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const + { + // use searcher.search(), then verify that returned value is a token + // if it is not, skip it and re-run + + const UInt8 * pos = haystack; + while (pos < haystack_end) + { + pos = searcher.search(pos, haystack_end); + if (pos == haystack_end || isToken(haystack, haystack_end, pos)) + return pos; + + // assuming that heendle does not contain any token separators. + pos += needle_size; + } + return haystack_end; + } + + const UInt8 * search(const UInt8 * haystack, const size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } + + ALWAYS_INLINE bool isToken(const UInt8 * haystack, const UInt8 * const haystack_end, const UInt8* p) const + { + return (p == haystack || isTokenSeparator(*(p - 1))) + && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size))); + } + + ALWAYS_INLINE static bool isTokenSeparator(const UInt8 c) + { + if (isAlphaNumericASCII(c) || !isASCII(c)) + return false; + + return true; + } +}; + using ASCIICaseSensitiveStringSearcher = StringSearcher; using ASCIICaseInsensitiveStringSearcher = StringSearcher; using UTF8CaseSensitiveStringSearcher = StringSearcher; using UTF8CaseInsensitiveStringSearcher = StringSearcher; +using ASCIICaseSensitiveTokenSearcher = TokenSearcher; /** Uses functions from libc. diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h index 748cbe09138..c87bdd79dab 100644 --- a/dbms/src/Common/Volnitsky.h +++ b/dbms/src/Common/Volnitsky.h @@ -327,6 +327,8 @@ protected: FallbackSearcher fallback_searcher; public: + using Searcher = FallbackSearcher; + /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified). * If you specify it small enough, the fallback algorithm will be used, * since it is considered that it's useless to waste time initializing the hash table. @@ -373,7 +375,7 @@ public: const auto res = pos - (hash[cell_num] - 1); /// pointer in the code is always padded array so we can use pagesafe semantics - if (fallback_searcher.compare(res)) + if (fallback_searcher.compare(haystack, haystack_end, res)) return res; } } @@ -520,7 +522,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) return true; } } @@ -552,7 +554,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans = std::min(ans, ind); } } @@ -590,7 +592,7 @@ public: { const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans = std::min(ans, res - haystack); } } @@ -625,7 +627,7 @@ public: { const auto * res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; - if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res)) ans[ind] = count_chars(haystack, res); } } @@ -650,6 +652,8 @@ using VolnitskyUTF8 = VolnitskyBase; /// ignores non-ASCII bytes using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase; +using VolnitskyToken = VolnitskyBase; + using MultiVolnitsky = MultiVolnitskyBase; using MultiVolnitskyUTF8 = MultiVolnitskyBase; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase; diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 726eb8738af..5d688232bd4 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -434,6 +434,74 @@ struct MultiSearchFirstIndexImpl } }; +/** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. + */ +template +struct HasTokenImpl +{ + using ResultType = UInt8; + + static void vector_constant( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray & res) + { + if (offsets.empty()) + return; + + const UInt8 * begin = data.data(); + const UInt8 * pos = begin; + const UInt8 * end = pos + data.size(); + + /// The current index in the array of strings. + size_t i = 0; + + VolnitskyToken searcher(pattern.data(), pattern.size(), end - pos); + + /// We will search for the next occurrence in all rows at once. + while (pos < end && end != (pos = searcher.search(pos, end - pos))) + { + /// Let's determine which index it refers to. + while (begin + offsets[i] <= pos) + { + res[i] = negate_result; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + offsets[i]) + res[i] = !negate_result; + else + res[i] = negate_result; + + pos = begin + offsets[i]; + ++i; + } + + /// Tail, in which there can be no substring. + if (i < res.size()) + memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0])); + } + + static void constant_constant(const std::string & data, const std::string & pattern, UInt8 & res) + { + VolnitskyToken searcher(pattern.data(), pattern.size(), data.size()); + const auto found = searcher.search(data.c_str(), data.size()) != data.end().base(); + res = negate_result ^ found; + } + + template + static void vector_vector(Args &&...) + { + throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + } + + /// Search different needles in single haystack. + template + static void constant_vector(Args &&...) + { + throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + } +}; + struct NamePosition { @@ -516,6 +584,11 @@ struct NameMultiSearchFirstPositionCaseInsensitiveUTF8 static constexpr auto name = "multiSearchFirstPositionCaseInsensitiveUTF8"; }; +struct NameHasToken +{ + static constexpr auto name = "hasToken"; +}; + using FunctionPosition = FunctionsStringSearch, NamePosition>; using FunctionPositionUTF8 = FunctionsStringSearch, NamePositionUTF8>; @@ -542,6 +615,7 @@ using FunctionMultiSearchFirstPositionUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitiveUTF8>; +using FunctionHasToken = FunctionsStringSearch, NameHasToken>; void registerFunctionsStringSearch(FunctionFactory & factory) { @@ -570,6 +644,8 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 3625c6f1aa5..246ad6784b2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -168,6 +168,19 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map return true; } }, + { + "hasToken", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + const auto & str = value.get(); + stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + }, { "startsWith", [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.python b/dbms/tests/queries/0_stateless/00990_hasToken.python new file mode 100755 index 00000000000..217d96dfe52 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.python @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# encoding: utf-8 + +import re + +HAYSTACKS = [ + "hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle", + "needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay", + "hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay", +] + +NEEDLE = "needle" + +HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE) +NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE) + +def replace_follow_case(replacement): + def func(match): + g = match.group() + if g.islower(): return replacement.lower() + if g.istitle(): return replacement.title() + if g.isupper(): return replacement.upper() + return replacement + return func + +def replace_separators(query, new_sep): + SEP_RE = re.compile('\\s+') + result = SEP_RE.sub(new_sep, query) + return result + +def enlarge_haystack(query, times, separator=''): + return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query) + +def small_needle(query): + return NEEDLE_RE.sub(replace_follow_case('n'), query) + +def remove_needle(query): + return NEEDLE_RE.sub('', query) + +def replace_needle(query, new_needle): + return NEEDLE_RE.sub(new_needle, query) + +# with str.lower, str.uppert, str.title and such +def transform_needle(query, string_transformation_func): + def replace_with_transformation(match): + g = match.group() + return string_transformation_func(g) + + return NEEDLE_RE.sub(replace_with_transformation, query) + + +def create_cases(table_row_template, table_query_template, const_query_template): + const_queries = [] + table_rows = [] + table_queries = set() + + def add_case(haystack, needle, match): + match = int(match) + const_queries.append(const_query_template.format(haystack=haystack, needle=needle, match=match)) + table_queries.add(table_query_template.format(haystack=haystack, needle=needle, match=match)) + table_rows.append(table_row_template.format(haystack=haystack, needle=needle, match=match)) + + # Negative cases + add_case(remove_needle(HAYSTACKS[0]), NEEDLE, False) + for haystack in HAYSTACKS: + add_case(transform_needle(haystack, str.title), NEEDLE, False) + sep = '' + h = replace_separators(haystack, sep) + add_case(h, NEEDLE, False) + add_case(small_needle(h), small_needle(NEEDLE), False) + add_case(enlarge_haystack(h, 10, sep), NEEDLE, False) + + # positive cases + for haystack in HAYSTACKS: + add_case(transform_needle(haystack, str.title), transform_needle(NEEDLE, str.title), True) + add_case(transform_needle(haystack, str.upper), transform_needle(NEEDLE, str.upper), True) + + # Not checking all separators since some (like ' and \n) cause issues when coupled with + # re-based replacement and quoting in query + # other are rare in practice and checking all separators makes this test too lengthy. + + # r'\\\\' turns into a single '\' in query + #separators = list(''' \t`~!@#$%^&*()-=+|]}[{";:/?.>,<''') + [r'\\\\'] + separators = list(''' \t;:?.,''') + [r'\\\\'] + for sep in separators: + h = replace_separators(haystack, sep) + add_case(h, NEEDLE, True) + add_case(small_needle(h), small_needle(NEEDLE), True) + add_case(enlarge_haystack(h, 200, sep), NEEDLE, True) + add_case(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True) + add_case(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True) + + return table_rows, table_queries, const_queries + +def main(): + + def query(x): + print x + + CONST_QUERY = """SELECT hasToken('{haystack}', '{needle}'), ' expecting ', {match};""" + #SELECT hasToken(haystack, '{needle}') FROM ht WHERE needle = '{needle}' AND match = {match};""" + TABLE_QUERY = """WITH '{needle}' as n SELECT haystack, needle, hasToken(haystack, n) as result FROM ht WHERE needle = n AND result != match;""" + TABLE_ROW = """('{haystack}', '{needle}', {match})""" + + rows, table_queries, const_queries = create_cases(TABLE_ROW, TABLE_QUERY, CONST_QUERY) + for q in const_queries: + query(q) + + query("""DROP TABLE IF EXISTS ht; + CREATE TABLE IF NOT EXISTS + ht +( + haystack String, + needle String, + match UInt8 +) +ENGINE MergeTree() +ORDER BY haystack; +INSERT INTO ht VALUES {values};""".format(values=", ".join(rows))) + for q in sorted(table_queries): + query(q) + +if __name__ == '__main__': + main() diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.reference b/dbms/tests/queries/0_stateless/00990_hasToken.reference new file mode 100644 index 00000000000..867c0c1c691 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.reference @@ -0,0 +1,139 @@ +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +0 expecting 0 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 +1 expecting 1 diff --git a/dbms/tests/queries/0_stateless/00990_hasToken.sh b/dbms/tests/queries/0_stateless/00990_hasToken.sh new file mode 100755 index 00000000000..4ccb77b8ecc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python $CURDIR/00990_hasToken.python | ${CLICKHOUSE_CLIENT} -nm diff --git a/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference new file mode 100644 index 00000000000..10e8f0d2c59 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -0,0 +1,3 @@ +2007 +2007 +2007 diff --git a/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql new file mode 100644 index 00000000000..60e4d959417 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -0,0 +1,33 @@ +SET allow_experimental_data_skipping_indices = 1; + +DROP TABLE IF EXISTS bloom_filter; + +CREATE TABLE bloom_filter +( + id UInt64, + s String, + INDEX tok_bf (s, lower(s)) TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1 +) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 8; + +insert into bloom_filter select number, 'yyy,uuu' from numbers(1024); +insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8); +insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024); +insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024); + +set max_rows_to_read = 16; + +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); + +-- invert result +-- this does not work as expected, reading more rows that it should +-- SELECT max(id) FROM bloom_filter WHERE NOT hasToken(s, 'yyy'); + +-- accessing to many rows +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'yyy'); -- { serverError 158 } + +-- this syntax is not supported by tokenbf +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz') == 1; -- { serverError 158 } + +DROP TABLE bloom_filter; \ No newline at end of file From d001c7e554ccedfba05b271a91687e918ab4e6d3 Mon Sep 17 00:00:00 2001 From: Alex Ryndin Date: Fri, 23 Aug 2019 17:27:07 +0300 Subject: [PATCH 33/59] Fix typo (#6631) --- docs/en/operations/settings/query_complexity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 77699c868b4..c00f2132ebd 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -79,7 +79,7 @@ Enables or disables execution of `GROUP BY` clauses in external memory. See [GRO Possible values: -- Maximum volume or RAM (in bytes) that can be used by the single [GROUP BY](../../query_language/select.md#select-group-by-clause) operation. +- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../query_language/select.md#select-group-by-clause) operation. - 0 — `GROUP BY` in external memory disabled. Default value: 0. From f757de462584ec8cad5d6cba4c73f3fe2b480324 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 17:56:38 +0300 Subject: [PATCH 34/59] Revert "Translate database engine documentation(zh) (#6625)" This reverts commit baf121c864550cdcafdcd70b9b2ab36afc7e9c53. --- docs/zh/database_engines/index.md | 12 +- docs/zh/database_engines/mysql.md | 125 +----------------- docs/zh/operations/table_engines/mergetree.md | 2 +- 3 files changed, 3 insertions(+), 136 deletions(-) diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md index f8ae05e2520..bbdb762a4ad 120000 --- a/docs/zh/database_engines/index.md +++ b/docs/zh/database_engines/index.md @@ -1,11 +1 @@ -# 数据库引擎 - -您使用的所有表都是由数据库引擎所提供的 - -默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). - -除此之外,您还可以选择使用以下的数据库引擎: - -- [MySQL](mysql.md) - -[来源文章](https://clickhouse.yandex/docs/en/database_engines/) +../../en/database_engines/index.md \ No newline at end of file diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md index 38dfcb5ef64..51ac4126e2d 120000 --- a/docs/zh/database_engines/mysql.md +++ b/docs/zh/database_engines/mysql.md @@ -1,124 +1 @@ -# MySQL - -MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 - -`MySQL`数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此您可以执行诸如`SHOW TABLES`或`SHOW CREATE TABLE`之类的操作。 - -但您无法对其执行以下操作: - -- `ATTACH`/`DETACH` -- `DROP` -- `RENAME` -- `CREATE TABLE` -- `ALTER` - - -## CREATE DATABASE - -``` sql -CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') -``` - -**MySQL数据库引擎参数** - -- `host:port` — 链接的MySQL地址。 -- `database` — 链接的MySQL数据库。 -- `user` — 链接的MySQL用户。 -- `password` — 链接的MySQL用户密码。 - - -## 支持的类型对应 - -MySQL | ClickHouse -------|------------ -UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) -TINYINT | [Int8](../data_types/int_uint.md) -UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) -SMALLINT | [Int16](../data_types/int_uint.md) -UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) -INT, MEDIUMINT | [Int32](../data_types/int_uint.md) -UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) -BIGINT | [Int64](../data_types/int_uint.md) -FLOAT | [Float32](../data_types/float.md) -DOUBLE | [Float64](../data_types/float.md) -DATE | [Date](../data_types/date.md) -DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) -BINARY | [FixedString](../data_types/fixedstring.md) - -其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 - -同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 - - -## 使用示例 - -在MySQL中创建表: - -``` -mysql> USE test; -Database changed - -mysql> CREATE TABLE `mysql_table` ( - -> `int_id` INT NOT NULL AUTO_INCREMENT, - -> `float` FLOAT NOT NULL, - -> PRIMARY KEY (`int_id`)); -Query OK, 0 rows affected (0,09 sec) - -mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2); -Query OK, 1 row affected (0,00 sec) - -mysql> select * from mysql_table; -+--------+-------+ -| int_id | value | -+--------+-------+ -| 1 | 2 | -+--------+-------+ -1 row in set (0,00 sec) -``` - -在ClickHouse中创建MySQL类型的数据库,同时与MySQL服务器交换数据: - -```sql -CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') -``` -```sql -SHOW DATABASES -``` -```text -┌─name─────┐ -│ default │ -│ mysql_db │ -│ system │ -└──────────┘ -``` -```sql -SHOW TABLES FROM mysql_db -``` -```text -┌─name─────────┐ -│ mysql_table │ -└──────────────┘ -``` -```sql -SELECT * FROM mysql_db.mysql_table -``` -```text -┌─int_id─┬─value─┐ -│ 1 │ 2 │ -└────────┴───────┘ -``` -```sql -INSERT INTO mysql_db.mysql_table VALUES (3,4) -``` -```sql -SELECT * FROM mysql_db.mysql_table -``` -```text -┌─int_id─┬─value─┐ -│ 1 │ 2 │ -│ 3 │ 4 │ -└────────┴───────┘ -``` - -[来源文章](https://clickhouse.yandex/docs/en/database_engines/mysql/) +../../en/database_engines/mysql.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md index 5e330164c5a..5ddf837708a 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/operations/table_engines/mergetree.md @@ -48,7 +48,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`。 `MergeTree` 引擎不需要其他参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - `PARTITION BY` — [分区键](custom_partitioning_key.md) 。 From abdd70fcc4b02297618c8ed29751e6bbff917fa1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:01:36 +0300 Subject: [PATCH 35/59] Fixed "splitted" build --- contrib/arrow-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index bc229deeced..843ff9cd8af 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -219,7 +219,7 @@ endif() add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) add_dependencies(${ARROW_LIBRARY} protoc) target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS}) -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${PROTOBUF_LIBRARIES} Threads::Threads) +target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY} Threads::Threads) if (ARROW_WITH_LZ4) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY}) endif() From 75e124f3909d1a6820938986f9d7f82b05acb309 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:05:27 +0300 Subject: [PATCH 36/59] Removed misleading flag from CMake --- CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ac4d67f6ae..f84a181a39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,11 +264,10 @@ if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") endif () -# TODO: only make this extra-checks in CI builds, since a lot of contrib libs won't link - -# CI works around this problem by explicitly adding GLIBC_COMPATIBILITY flag. -if (NOT SANITIZE AND YANDEX_OFFICIAL_BUILD) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") +# Make this extra-checks for correct library dependencies. +if (NOT SANITIZE) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") endif () include (cmake/find_unwind.cmake) From 5fd649e663e126813b074f2c819baca8a6eedc7c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:10:33 +0300 Subject: [PATCH 37/59] Check for broken symlinks #6625 --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index fed4b6b8670..ef3bf6cfad4 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -25,3 +25,6 @@ find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | grep -vP 'Compiler|build' | xargs grep $@ -P '}\s*//+\s*namespace\s*' + +# Broken symlinks +find -L $ROOT_PATH -type l | grep -v contrib && echo "^ Broken symlinks found" From 016b1ee2f77646806d95aa28b562ab7c8e0fd41e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 18:42:45 +0300 Subject: [PATCH 38/59] Increase timeout for "stack overflow" test because it may take a long time in debug build --- dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh | 2 ++ dbms/tests/queries/shell_config.sh | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh b/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh index 64fae3fb0f9..14f2a8e31fb 100755 --- a/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh +++ b/dbms/tests/queries/0_stateless/00984_parser_stack_overflow.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLICKHOUSE_CURL_TIMEOUT=30 + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh diff --git a/dbms/tests/queries/shell_config.sh b/dbms/tests/queries/shell_config.sh index d4ab11be927..b3058a6cdbe 100644 --- a/dbms/tests/queries/shell_config.sh +++ b/dbms/tests/queries/shell_config.sh @@ -46,7 +46,8 @@ export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:="9009"} export CLICKHOUSE_URL_INTERSERVER=${CLICKHOUSE_URL_INTERSERVER:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_INTERSERVER}/"} export CLICKHOUSE_CURL_COMMAND=${CLICKHOUSE_CURL_COMMAND:="curl"} -export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} --max-time 10"} +export CLICKHOUSE_CURL_TIMEOUT=${CLICKHOUSE_CURL_TIMEOUT:="10"} +export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} --max-time ${CLICKHOUSE_CURL_TIMEOUT}"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} From 50b927a9d74ed6de672ec9cc58e230c545c6ce43 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 19:08:27 +0300 Subject: [PATCH 39/59] Update StringSearcher.h --- dbms/src/Common/StringSearcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index fecf1a7ca81..3cb6e56ab78 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -717,7 +717,7 @@ public: { if (std::any_of(reinterpret_cast(needle_), reinterpret_cast(needle_) + needle_size_, isTokenSeparator)) { - throw Exception{"needle must not contain whitespace characters", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"Needle must not contain whitespace or separator characters", ErrorCodes::BAD_ARGUMENTS}; } } From c781e1c6a7640ddf29bb0af52608fa97a1793736 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 23 Aug 2019 19:09:24 +0300 Subject: [PATCH 40/59] Update StringSearcher.h --- dbms/src/Common/StringSearcher.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index 3cb6e56ab78..25287db11f5 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -160,7 +160,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; @@ -377,7 +377,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -570,7 +570,7 @@ public: #endif } - ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * /*haystack*/, const UInt8 * /*haystack_end*/, const UInt8 * pos) const { #ifdef __SSE4_1__ if (pageSafe(pos)) @@ -722,7 +722,7 @@ public: } - ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const + ALWAYS_INLINE bool compare(const UInt8 * haystack, const UInt8 * haystack_end, const UInt8 * pos) const { // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. if (isToken(haystack, haystack_end, pos)) From b42f85e16bd683b1f03fe4a0833370c742e1445b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 21:30:04 +0300 Subject: [PATCH 41/59] Added a check for double whitespaces --- dbms/src/AggregateFunctions/QuantileExact.h | 2 +- dbms/src/DataTypes/DataTypeEnum.cpp | 2 +- dbms/src/Functions/GeoUtils.cpp | 2 +- .../Formats/Impl/ProtobufRowInputFormat.cpp | 2 +- .../MergeTree/MergeTreeRangeReader.cpp | 2 +- .../Storages/MergeTree/MergeTreeRangeReader.h | 2 +- .../Storages/System/StorageSystemNumbers.cpp | 2 +- utils/check-style/check-style | 3 ++ utils/check-style/double-whitespaces.pl | 33 +++++++++++++++++++ 9 files changed, 43 insertions(+), 7 deletions(-) create mode 100755 utils/check-style/double-whitespaces.pl diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index 5a1343b1399..4a2aa574ae9 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -176,7 +176,7 @@ struct QuantileExactExclusive : public QuantileExact } }; -/// QuantileExactInclusive is equivalent to Excel PERCENTILE and PERCENTILE.INC, R-7, SciPy-(1,1) +/// QuantileExactInclusive is equivalent to Excel PERCENTILE and PERCENTILE.INC, R-7, SciPy-(1,1) template struct QuantileExactInclusive : public QuantileExact { diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index cffc29feaf8..add7052195a 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -234,7 +234,7 @@ void DataTypeEnum::deserializeBinaryBulk( } template -void DataTypeEnum::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +void DataTypeEnum::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 5134343dae0..847d934c6b4 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -18,7 +18,7 @@ const UInt8 geohash_base32_decode_lookup_table[256] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 12, 13, 14, 15, 16, 0xFF, 17, 18, 0xFF, 19, 20, 0xFF, diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 25fecc5c642..1cd9d329c9d 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -12,7 +12,7 @@ namespace DB ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_) : IRowInputFormat(header_, in_, params_) - , data_types(header_.getDataTypes()) + , data_types(header_.getDataTypes()) , reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames()) { } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 2aae847217e..932721eb028 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -108,7 +108,7 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block) MergeTreeRangeReader::Stream::Stream( - size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader_) + size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader_) : current_mark(from_mark), offset_after_current_mark(0) , last_mark(to_mark) , merge_tree_reader(merge_tree_reader_) diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index 9552373901c..0eae69ee17e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -41,7 +41,7 @@ public: { public: DelayedStream() = default; - DelayedStream(size_t from_mark, MergeTreeReader * merge_tree_reader); + DelayedStream(size_t from_mark, MergeTreeReader * merge_tree_reader); /// Read @num_rows rows from @from_mark starting from @offset row /// Returns the number of rows added to block. diff --git a/dbms/src/Storages/System/StorageSystemNumbers.cpp b/dbms/src/Storages/System/StorageSystemNumbers.cpp index 2afe2a7c018..2f155e22a11 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.cpp +++ b/dbms/src/Storages/System/StorageSystemNumbers.cpp @@ -146,7 +146,7 @@ BlockInputStreams StorageSystemNumbers::read( res[i] = std::make_shared(max_block_size, offset + i * max_block_size, num_streams * max_block_size); if (limit) /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - res[i] = std::make_shared(res[i], *limit * (i + 1) / num_streams - *limit * i / num_streams, 0, false, true); + res[i] = std::make_shared(res[i], *limit * (i + 1) / num_streams - *limit * i / num_streams, 0, false, true); } return res; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index ef3bf6cfad4..deed481f043 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -28,3 +28,6 @@ find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | # Broken symlinks find -L $ROOT_PATH -type l | grep -v contrib && echo "^ Broken symlinks found" + +# Double whitespaces +find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done diff --git a/utils/check-style/double-whitespaces.pl b/utils/check-style/double-whitespaces.pl new file mode 100755 index 00000000000..47b03cb74ab --- /dev/null +++ b/utils/check-style/double-whitespaces.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +use strict; + +# Find double whitespace such as "a, b, c" that looks very ugly and annoying. +# But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines. + +my @array; + +while (<>) +{ + push @array, $_; +} + +my $ret = 0; + +for (my $i = 1; $i < $#array; ++$i) +{ + if ($array[$i] =~ ',( {2,3})[^ /]') + { + # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl + + if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment + && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) + && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1, 2 } + { + print(($i + 1) . ":" . $array[$i]); + $ret = 1; + } + } +} + +exit $ret; From a21b43913fd97d0a97232764ad2f338da93a9561 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 23 Aug 2019 21:40:42 +0300 Subject: [PATCH 42/59] fix crash in OptimizedRegularExpression --- dbms/src/Common/OptimizedRegularExpression.cpp | 5 +++-- .../0_stateless/00997_extract_all_crash_6627.reference | 1 + .../queries/0_stateless/00997_extract_all_crash_6627.sql | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference create mode 100644 dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql diff --git a/dbms/src/Common/OptimizedRegularExpression.cpp b/dbms/src/Common/OptimizedRegularExpression.cpp index c87d87fc2df..3a224709447 100644 --- a/dbms/src/Common/OptimizedRegularExpression.cpp +++ b/dbms/src/Common/OptimizedRegularExpression.cpp @@ -1,4 +1,5 @@ #include +#include #include #define MIN_LENGTH_FOR_STRSTR 3 @@ -413,9 +414,9 @@ unsigned OptimizedRegularExpressionImpl::match(const char * subject return 0; } - StringPieceType pieces[MAX_SUBPATTERNS]; + DB::PODArrayWithStackMemory pieces(limit); - if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces, limit)) + if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces.data(), pieces.size())) return 0; else { diff --git a/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference new file mode 100644 index 00000000000..acb53e80e6d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.reference @@ -0,0 +1 @@ +['9'] diff --git a/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql new file mode 100644 index 00000000000..06de4ec8afb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_extract_all_crash_6627.sql @@ -0,0 +1 @@ +SELECT extractAll('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 YaBrowser/19.7.2.455 Yowser/2.5 Safari/537.36', '[Y][a-zA-Z]{8}/[1-9]([1-9]+)?(((.?)([0-9]+)?){0,4})?'); From 7dffa0fe9fbca08d6f25bc83b53fdee1ffeadbf5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 23 Aug 2019 22:19:36 +0300 Subject: [PATCH 43/59] added wait for mutation to indices tests --- dbms/tests/queries/0_stateless/00942_mutate_index.sh | 4 ++-- dbms/tests/queries/0_stateless/00943_materialize_index.sh | 6 ++++++ .../queries/0_stateless/00944_clear_index_in_partition.sh | 3 ++- .../00975_indices_mutation_replicated_zookeeper.sh | 7 +++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00942_mutate_index.sh b/dbms/tests/queries/0_stateless/00942_mutate_index.sh index c6dd1dfb836..467eb9ab671 100755 --- a/dbms/tests/queries/0_stateless/00942_mutate_index.sh +++ b/dbms/tests/queries/0_stateless/00942_mutate_index.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -35,8 +36,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 1;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 5;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx UPDATE i64 = 5 WHERE i64 = 1;" - -sleep 0.1 +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 1;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 5;" diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index f51b66993aa..ab2fd1e5355 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -38,22 +39,27 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" +wait_for_mutation "minmax_idx" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 2;" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" +wait_for_mutation "minmax_idx" "mutation_5.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 9047bbb3a72..7d68bac8c83 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" @@ -42,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -sleep 0.5 +wait_for_mutation "minmax_idx" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 613226a3fb7..1bcb4f17edd 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -2,6 +2,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.indices_mutaions1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.indices_mutaions2;" @@ -47,13 +48,15 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -sleep 1 +wait_for_mutation "indices_mutaions1" "mutation_1.txt" "test" +wait_for_mutation "indices_mutaions2" "mutation_1.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 MATERIALIZE INDEX idx IN PARTITION 1;" -sleep 1 +wait_for_mutation "indices_mutaions1" "mutation_2.txt" "test" +wait_for_mutation "indices_mutaions2" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" From ba2d17c12a68ede2bb47e34fdfbdd6ea0ec33a8e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 23 Aug 2019 22:36:17 +0300 Subject: [PATCH 44/59] fix --- .../0_stateless/00943_materialize_index.sh | 17 ++++++++--------- .../00944_clear_index_in_partition.sh | 2 +- ...975_indices_mutation_replicated_zookeeper.sh | 3 +-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index ab2fd1e5355..feab59b368e 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -39,27 +39,26 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_1.txt" "test" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" - -$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" +$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" wait_for_mutation "minmax_idx" "mutation_3.txt" "test" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx CLEAR INDEX idx IN PARTITION 2;" -wait_for_mutation "minmax_idx" "mutation_4.txt" "test" +sleep 0.5 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" -wait_for_mutation "minmax_idx" "mutation_5.txt" "test" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 7d68bac8c83..5a7bdd8e3ae 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -43,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_1.txt" "test" +wait_for_mutation "minmax_idx" "mutation_2.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 1bcb4f17edd..5e6159475f8 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -48,8 +48,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -wait_for_mutation "indices_mutaions1" "mutation_1.txt" "test" -wait_for_mutation "indices_mutaions2" "mutation_1.txt" "test" +sleep 0.5 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" From d38e9ee229cede07348b2531c3949a9f11b9abe3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Aug 2019 23:32:31 +0300 Subject: [PATCH 45/59] Fixed "trim" functions (in progress) --- dbms/src/Functions/trim.cpp | 64 ++---- libs/libcommon/include/common/find_symbols.h | 198 +++++++++++++------ 2 files changed, 157 insertions(+), 105 deletions(-) diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index f2e2543cc90..81916604d63 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -1,10 +1,8 @@ #include #include #include +#include -#ifdef __SSE4_2__ -#include -#endif namespace DB { @@ -60,7 +58,7 @@ public: execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); res_data.resize(res_data.size() + length + 1); - memcpy(&res_data[res_offset], start, length); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length); res_offset += length + 1; res_data[res_offset - 1] = '\0'; @@ -77,59 +75,27 @@ public: private: static void execute(const UInt8 * data, size_t size, const UInt8 *& res_data, size_t & res_size) { - size_t chars_to_trim_left = 0; - size_t chars_to_trim_right = 0; - char whitespace = ' '; -#ifdef __SSE4_2__ - const auto bytes_sse = sizeof(__m128i); - const auto size_sse = size - (size % bytes_sse); - const auto whitespace_mask = _mm_set1_epi8(whitespace); - constexpr auto base_sse_mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY; - auto mask = bytes_sse; -#endif + const char * char_data = reinterpret_cast(data); + const char * char_end = char_data + size; if constexpr (mode::trim_left) { -#ifdef __SSE4_2__ - /// skip whitespace from left in blocks of up to 16 characters - - /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate - enum { left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT }; - while (mask == bytes_sse && chars_to_trim_left < size_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data + chars_to_trim_left)); - mask = _mm_cmpistri(whitespace_mask, chars, left_sse_mode); - chars_to_trim_left += mask; - } -#endif - /// skip remaining whitespace from left, character by character - while (chars_to_trim_left < size && data[chars_to_trim_left] == whitespace) - ++chars_to_trim_left; + const char * found = find_first_not_symbols<' '>(char_data, char_end); + size_t num_chars = found - char_data; + char_data += num_chars; } - if constexpr (mode::trim_right) + if constexpr (mode::trim_left) { - const auto trim_right_size = size - chars_to_trim_left; -#ifdef __SSE4_2__ - /// try to skip whitespace from right in blocks of up to 16 characters - - /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate - enum { right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT }; - const auto trim_right_size_sse = trim_right_size - (trim_right_size % bytes_sse); - while (mask == bytes_sse && chars_to_trim_right < trim_right_size_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data + size - chars_to_trim_right - bytes_sse)); - mask = _mm_cmpistri(whitespace_mask, chars, right_sse_mode); - chars_to_trim_right += mask; - } -#endif - /// skip remaining whitespace from right, character by character - while (chars_to_trim_right < trim_right_size && data[size - chars_to_trim_right - 1] == whitespace) - ++chars_to_trim_right; + const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); + if (found) + char_end = found + 1; + else + char_end = char_data; } - res_data = data + chars_to_trim_left; - res_size = size - chars_to_trim_left - chars_to_trim_right; + res_data = reinterpret_cast(char_data); + res_size = char_end - char_data; } }; diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 68b49397683..920a7df04c5 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -65,115 +65,153 @@ inline __m128i mm_is_in(__m128i bytes) } #endif - -template -inline const char * find_first_symbols_sse2(const char * begin, const char * end) +template +bool maybe_negate(bool x) { + if constexpr (positive) + return x; + else + return !x; +} + +template +uint16_t maybe_negate(uint16_t x) +{ + if constexpr (positive) + return x; + else + return ~x; +} + +enum class ReturnMode +{ + End, + Nullptr, +}; + + +template +inline const char * find_first_symbols_sse2(const char * const begin, const char * const end) +{ + const char * pos = begin; + #if defined(__SSE2__) - for (; begin + 15 < end; begin += 16) + for (; pos + 15 < end; pos += 16) { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); __m128i eq = mm_is_in(bytes); - uint16_t bit_mask = _mm_movemask_epi8(eq); + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); if (bit_mask) - return begin + __builtin_ctz(bit_mask); + return pos + __builtin_ctz(bit_mask); } #endif - for (; begin < end; ++begin) - if (is_in(*begin)) - return begin; - return end; + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; } -template -inline const char * find_last_symbols_or_null_sse2(const char * begin, const char * end) +template +inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) { + const char * pos = end; + #if defined(__SSE2__) - for (; end - 16 >= begin; end -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. + for (; pos - 16 >= begin; pos -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(end - 16)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos - 16)); __m128i eq = mm_is_in(bytes); - uint16_t bit_mask = _mm_movemask_epi8(eq); + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); if (bit_mask) - return end - 1 - (__builtin_clz(bit_mask) - 16); /// because __builtin_clz works with mask as uint32. + return pos - 1 - (__builtin_clz(bit_mask) - 16); /// because __builtin_clz works with mask as uint32. } #endif - --end; - for (; end >= begin; --end) - if (is_in(*end)) - return end; + --pos; + for (; pos >= begin; --pos) + if (maybe_negate(is_in(*pos))) + return pos; - return nullptr; + return return_mode == ReturnMode::End ? end : nullptr; } -template -inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) +inline const char * find_first_symbols_sse42_impl(const char * const begin, const char * const end) { + const char * pos = begin; + #if defined(__SSE4_2__) #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); - for (; begin + 15 < end; begin += 16) + for (; pos + 15 < end; pos += 16) { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); - if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) - return begin + _mm_cmpestri(set, num_chars, bytes, 16, MODE); + if constexpr (positive) + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE); + } + else + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY); + } } #undef MODE #endif - for (; begin < end; ++begin) - if ( (num_chars >= 1 && *begin == c01) - || (num_chars >= 2 && *begin == c02) - || (num_chars >= 3 && *begin == c03) - || (num_chars >= 4 && *begin == c04) - || (num_chars >= 5 && *begin == c05) - || (num_chars >= 6 && *begin == c06) - || (num_chars >= 7 && *begin == c07) - || (num_chars >= 8 && *begin == c08) - || (num_chars >= 9 && *begin == c09) - || (num_chars >= 10 && *begin == c10) - || (num_chars >= 11 && *begin == c11) - || (num_chars >= 12 && *begin == c12) - || (num_chars >= 13 && *begin == c13) - || (num_chars >= 14 && *begin == c14) - || (num_chars >= 15 && *begin == c15) - || (num_chars >= 16 && *begin == c16)) - return begin; - return end; + for (; pos < end; ++pos) + if ( (num_chars >= 1 && maybe_negate(*pos == c01)) + || (num_chars >= 2 && maybe_negate(*pos == c02)) + || (num_chars >= 3 && maybe_negate(*pos == c03)) + || (num_chars >= 4 && maybe_negate(*pos == c04)) + || (num_chars >= 5 && maybe_negate(*pos == c05)) + || (num_chars >= 6 && maybe_negate(*pos == c06)) + || (num_chars >= 7 && maybe_negate(*pos == c07)) + || (num_chars >= 8 && maybe_negate(*pos == c08)) + || (num_chars >= 9 && maybe_negate(*pos == c09)) + || (num_chars >= 10 && maybe_negate(*pos == c10)) + || (num_chars >= 11 && maybe_negate(*pos == c11)) + || (num_chars >= 12 && maybe_negate(*pos == c12)) + || (num_chars >= 13 && maybe_negate(*pos == c13)) + || (num_chars >= 15 && maybe_negate(*pos == c15)) + || (num_chars >= 16 && maybe_negate(*pos == c16))) + return pos; + return return_mode == ReturnMode::End ? end : nullptr; } -template +template inline const char * find_first_symbols_sse42(const char * begin, const char * end) { - return find_first_symbols_sse42_impl(begin, end); + return find_first_symbols_sse42_impl(begin, end); } /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. -template +template inline const char * find_first_symbols_dispatch(const char * begin, const char * end) { #if defined(__SSE4_2__) if (sizeof...(symbols) >= 5) - return find_first_symbols_sse42(begin, end); + return find_first_symbols_sse42(begin, end); else #endif - return find_first_symbols_sse2(begin, end); + return find_first_symbols_sse2(begin, end); } } @@ -182,7 +220,7 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char * template inline const char * find_first_symbols(const char * begin, const char * end) { - return detail::find_first_symbols_dispatch(begin, end); + return detail::find_first_symbols_dispatch(begin, end); } /// Returning non const result for non const arguments. @@ -190,18 +228,66 @@ inline const char * find_first_symbols(const char * begin, const char * end) template inline char * find_first_symbols(char * begin, char * end) { - return const_cast(detail::find_first_symbols_dispatch(begin, end)); + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_not_symbols(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_not_symbols(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); +} + +template +inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch(begin, end); +} + +template +inline char * find_first_not_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_first_symbols_dispatch(begin, end)); } template inline const char * find_last_symbols_or_null(const char * begin, const char * end) { - return detail::find_last_symbols_or_null_sse2(begin, end); + return detail::find_last_symbols_sse2(begin, end); } template inline char * find_last_symbols_or_null(char * begin, char * end) { - return const_cast(detail::find_last_symbols_or_null_sse2(begin, end)); + return const_cast(detail::find_last_symbols_sse2(begin, end)); +} + +template +inline const char * find_last_not_symbols_or_null(const char * begin, const char * end) +{ + return detail::find_last_symbols_sse2(begin, end); +} + +template +inline char * find_last_not_symbols_or_null(char * begin, char * end) +{ + return const_cast(detail::find_last_symbols_sse2(begin, end)); } From cdd6dca51771520b70df52550b3ae427b4fc82f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 00:10:26 +0300 Subject: [PATCH 46/59] Remove Compiler --- dbms/programs/CMakeLists.txt | 28 +- dbms/programs/clang/CMakeLists.txt | 38 -- .../clang/Compiler-5.0.0/CMakeLists.txt | 53 -- .../programs/clang/Compiler-5.0.0/LICENSE.TXT | 63 -- .../clang/Compiler-5.0.0/cc1_main.cpp | 242 -------- .../clang/Compiler-5.0.0/cc1as_main.cpp | 540 ----------------- dbms/programs/clang/Compiler-5.0.0/driver.cpp | 519 ---------------- dbms/programs/clang/Compiler-5.0.0/lld.cpp | 23 - dbms/programs/clang/Compiler-5.0.1 | 1 - dbms/programs/clang/Compiler-5.0.2 | 1 - .../clang/Compiler-6.0.0/CMakeLists.txt | 54 -- .../programs/clang/Compiler-6.0.0/LICENSE.TXT | 63 -- .../clang/Compiler-6.0.0/cc1_main.cpp | 242 -------- .../clang/Compiler-6.0.0/cc1as_main.cpp | 540 ----------------- dbms/programs/clang/Compiler-6.0.0/driver.cpp | 520 ---------------- dbms/programs/clang/Compiler-6.0.0/lld.cpp | 23 - dbms/programs/clang/Compiler-6.0.0svn | 1 - dbms/programs/clang/Compiler-6.0.1 | 1 - .../clang/Compiler-7.0.0/CMakeLists.txt | 49 -- .../clang/Compiler-7.0.0/cc1_main.cpp | 239 -------- .../clang/Compiler-7.0.0/cc1as_main.cpp | 572 ------------------ .../Compiler-7.0.0/cc1gen_reproducer_main.cpp | 196 ------ dbms/programs/clang/Compiler-7.0.0/driver.cpp | 514 ---------------- dbms/programs/clang/Compiler-7.0.0/lld.cpp | 150 ----- .../Compiler-7.0.0bundled/CMakeLists.txt | 49 -- .../clang/Compiler-7.0.0bundled/cc1_main.cpp | 243 -------- .../Compiler-7.0.0bundled/cc1as_main.cpp | 555 ----------------- .../clang/Compiler-7.0.0bundled/driver.cpp | 512 ---------------- .../clang/Compiler-7.0.0bundled/lld.cpp | 10 - dbms/programs/clang/Compiler-7.0.0svn | 1 - dbms/programs/clang/Compiler-7.0.1 | 1 - dbms/programs/clang/clickhouse-clang.cpp | 2 - dbms/programs/clang/clickhouse-lld.cpp | 2 - dbms/programs/clang/copy_headers.sh | 100 --- dbms/programs/main.cpp | 16 - dbms/src/Core/Settings.h | 2 +- dbms/src/Interpreters/Aggregator.cpp | 254 +------- dbms/src/Interpreters/Aggregator.h | 58 +- dbms/src/Interpreters/CMakeLists.txt | 67 -- dbms/src/Interpreters/Compiler.cpp | 326 ---------- dbms/src/Interpreters/Compiler.h | 88 --- dbms/src/Interpreters/Context.cpp | 13 - .../Interpreters/InterpreterSelectQuery.cpp | 4 - dbms/src/Interpreters/SpecializedAggregator.h | 215 ------- dbms/src/Interpreters/config_compile.h.in | 26 - dbms/src/Interpreters/tests/CMakeLists.txt | 3 - dbms/src/Interpreters/tests/compiler_test.cpp | 57 -- .../00281_compile_sizeof_packed.reference | 2 - .../00281_compile_sizeof_packed.sql | 2 - .../00568_compile_catch_throw.reference | 2 - .../0_stateless/00568_compile_catch_throw.sh | 14 - 51 files changed, 8 insertions(+), 7288 deletions(-) delete mode 100644 dbms/programs/clang/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT delete mode 100644 dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-5.0.0/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-5.0.1 delete mode 120000 dbms/programs/clang/Compiler-5.0.2 delete mode 100644 dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT delete mode 100644 dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-6.0.0/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-6.0.0svn delete mode 120000 dbms/programs/clang/Compiler-6.0.1 delete mode 100644 dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0/lld.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp delete mode 100644 dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp delete mode 120000 dbms/programs/clang/Compiler-7.0.0svn delete mode 120000 dbms/programs/clang/Compiler-7.0.1 delete mode 100644 dbms/programs/clang/clickhouse-clang.cpp delete mode 100644 dbms/programs/clang/clickhouse-lld.cpp delete mode 100755 dbms/programs/clang/copy_headers.sh delete mode 100644 dbms/src/Interpreters/Compiler.cpp delete mode 100644 dbms/src/Interpreters/Compiler.h delete mode 100644 dbms/src/Interpreters/SpecializedAggregator.h delete mode 100644 dbms/src/Interpreters/config_compile.h.in delete mode 100644 dbms/src/Interpreters/tests/compiler_test.cpp delete mode 100644 dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference delete mode 100644 dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql delete mode 100644 dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference delete mode 100755 dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 03eba470949..0dcd4d7ab91 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -81,7 +81,6 @@ add_subdirectory (extract-from-config) add_subdirectory (compressor) add_subdirectory (copier) add_subdirectory (format) -add_subdirectory (clang) add_subdirectory (obfuscator) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) @@ -89,9 +88,9 @@ if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_COMPILER_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_COMPILER_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_COMPILER_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) + add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) + target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) + target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() @@ -104,10 +103,6 @@ if (CLICKHOUSE_SPLIT_BINARY) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) endif () - if (USE_EMBEDDED_COMPILER) - list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) - endif () - set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) @@ -115,10 +110,6 @@ if (CLICKHOUSE_SPLIT_BINARY) install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse) else () - if (USE_EMBEDDED_COMPILER) - # before add_executable ! - link_directories (${LLVM_LIBRARY_DIRS}) - endif () add_executable (clickhouse main.cpp) target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils) target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) @@ -154,9 +145,6 @@ else () if (ENABLE_CLICKHOUSE_OBFUSCATOR) clickhouse_target_link_split_lib(clickhouse obfuscator) endif () - if (USE_EMBEDDED_COMPILER) - target_link_libraries(clickhouse PRIVATE clickhouse-compiler-lib) - endif () set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -213,18 +201,8 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) endif() - # install always because depian package want this files: - add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) - add_custom_target (clickhouse-lld ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-lld DEPENDS clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-clang clickhouse-lld) - install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-clang - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-lld - DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) endif () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt deleted file mode 100644 index 82f520614f4..00000000000 --- a/dbms/programs/clang/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -if (USE_EMBEDDED_COMPILER) - add_subdirectory ("Compiler-${LLVM_VERSION}") -endif () - -if (CLICKHOUSE_SPLIT_BINARY) - if (USE_EMBEDDED_COMPILER) - link_directories (${LLVM_LIBRARY_DIRS}) - add_executable (clickhouse-clang clickhouse-clang.cpp) - target_link_libraries (clickhouse-clang PRIVATE clickhouse-compiler-lib) - add_executable (clickhouse-lld clickhouse-lld.cpp) - target_link_libraries (clickhouse-lld PRIVATE clickhouse-compiler-lib) - install (TARGETS clickhouse-clang clickhouse-lld RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - endif () -endif () - -set (TMP_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/${INTERNAL_COMPILER_HEADERS_RELATIVE}") -# Make and install empty dir for debian package if compiler disabled -add_custom_target (make-headers-directory ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${TMP_HEADERS_DIR}) -install (DIRECTORY ${TMP_HEADERS_DIR} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/${INTERNAL_COMPILER_HEADERS_DIR} COMPONENT clickhouse) -# TODO: fix on macos copy_headers.sh: sed --posix - -if (USE_EMBEDDED_COMPILER) - set (COPY_HEADERS_COMPILER "${CMAKE_CURRENT_BINARY_DIR}/../${INTERNAL_COMPILER_EXECUTABLE}") - set (COPY_HEADERS_DEPENDS clickhouse-clang) -elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}) - set (COPY_HEADERS_COMPILER "${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}") -endif () - -if (COPY_HEADERS_COMPILER) - add_custom_target (copy-headers [ -f ${TMP_HEADERS_DIR}/dbms/src/Interpreters/SpecializedAggregator.h ] || env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} CMAKE_CXX_COMPILER_VERSION=${CMAKE_CXX_COMPILER_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES copy_headers.sh) - - if (USE_INTERNAL_LLVM_LIBRARY) - set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers") - set (CLANG_HEADERS_DEST "${TMP_HEADERS_DIR}/usr/local/lib/clang/${LLVM_VERSION}/include") # original: ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include - add_custom_target (copy-headers-clang ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) - add_dependencies (copy-headers copy-headers-clang) - endif () -endif () diff --git a/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt deleted file mode 100644 index 83e38cea257..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver clangFrontend clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic - -lldCOFF -lldDriver -lldELF -#lldMinGW -lldMachO -lldReaderWriter -lldYAML -#lldCommon -lldCore -lldConfig - -${REQUIRED_LLVM_LIBRARIES} - -LLVMSupport - -#Polly -#PollyISL -#PollyPPCG - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT b/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT deleted file mode 100644 index b452ca2efd8..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/LICENSE.TXT +++ /dev/null @@ -1,63 +0,0 @@ -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2007-2016 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- - - diff --git a/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp deleted file mode 100644 index f6eabaf3387..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/cc1_main.cpp +++ /dev/null @@ -1,242 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp deleted file mode 100644 index 2fc2b508ef2..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/cc1as_main.cpp +++ /dev/null @@ -1,540 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, CodeModel::Default, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - MCCodeEmitter *CE = nullptr; - MCAsmBackend *MAB = nullptr; - if (Opts.ShowEncoding) { - CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple, Opts.CPU, Options); - } - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, CE, MAB, Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple, - Opts.CPU, Options); - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, *MAB, *Out, CE, *STI, Opts.RelaxAll, - Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/driver.cpp b/dbms/programs/clang/Compiler-5.0.0/driver.cpp deleted file mode 100644 index 5aec2759f9e..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/driver.cpp +++ /dev/null @@ -1,519 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); - -static void insertTargetAndModeArgs(StringRef Target, StringRef Mode, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - if (!Mode.empty()) { - // Add the mode flag to the arguments. - auto it = ArgVector.begin(); - if (it != ArgVector.end()) - ++it; - ArgVector.insert(it, GetStableCStr(SavedStrings, Mode)); - } - - if (!Target.empty()) { - auto it = ArgVector.begin(); - if (it != ArgVector.end()) - ++it; - const char *arr[] = {"-target", GetStableCStr(SavedStrings, Target)}; - ArgVector.insert(it, std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::sys::PrintStackTraceOnErrorSignal(argv_[0]); - llvm::PrettyStackTraceProgram X(argc_, argv_); - llvm::llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - SmallVector argv; - llvm::SpecificBumpPtrAllocator ArgAllocator; - std::error_code EC = llvm::sys::Process::GetArgumentVector( - argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator); - if (EC) { - llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n'; - return 1; - } - - llvm::InitializeAllTargets(); - std::string ProgName = argv[0]; - std::pair TargetAndMode = - ToolChain::getTargetAndModeFromProgramName(ProgName); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (TargetAndMode.second == "--driver-mode=cl" || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - - insertTargetAndModeArgs(TargetAndMode.first, TargetAndMode.second, argv, - SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef LLVM_ON_WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef LLVM_ON_WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-5.0.0/lld.cpp b/dbms/programs/clang/Compiler-5.0.0/lld.cpp deleted file mode 100644 index 5af29868864..00000000000 --- a/dbms/programs/clang/Compiler-5.0.0/lld.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "lld/Driver/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Signals.h" - -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -int mainEntryClickHouseLLD(int Argc, char **Argv) -{ - // Standard set up, so program fails gracefully. - sys::PrintStackTraceOnErrorSignal(Argv[0]); - PrettyStackTraceProgram StackPrinter(Argc, Argv); - llvm_shutdown_obj Shutdown; - - std::vector Args(Argv, Argv + Argc); - return !elf::link(Args, true); -} diff --git a/dbms/programs/clang/Compiler-5.0.1 b/dbms/programs/clang/Compiler-5.0.1 deleted file mode 120000 index 7c8af57399f..00000000000 --- a/dbms/programs/clang/Compiler-5.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-5.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-5.0.2 b/dbms/programs/clang/Compiler-5.0.2 deleted file mode 120000 index 7c8af57399f..00000000000 --- a/dbms/programs/clang/Compiler-5.0.2 +++ /dev/null @@ -1 +0,0 @@ -Compiler-5.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt deleted file mode 100644 index 4a046674afc..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ - -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore -#lldWasm - -${REQUIRED_LLVM_LIBRARIES} - -#Polly -#PollyISL -#PollyPPCG - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT b/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT deleted file mode 100644 index b452ca2efd8..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/LICENSE.TXT +++ /dev/null @@ -1,63 +0,0 @@ -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2007-2016 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- - - diff --git a/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp deleted file mode 100644 index f6eabaf3387..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/cc1_main.cpp +++ /dev/null @@ -1,242 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp deleted file mode 100644 index caf8409054a..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/cc1as_main.cpp +++ /dev/null @@ -1,540 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - MCCodeEmitter *CE = nullptr; - MCAsmBackend *MAB = nullptr; - if (Opts.ShowEncoding) { - CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options); - } - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, CE, MAB, Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); - MCTargetOptions Options; - MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options); - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::unique_ptr(MAB), *Out, std::unique_ptr(CE), *STI, - Opts.RelaxAll, Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value = 0; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/driver.cpp b/dbms/programs/clang/Compiler-6.0.0/driver.cpp deleted file mode 100644 index 30511b8253a..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/driver.cpp +++ /dev/null @@ -1,520 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - auto InsertionPoint = ArgVector.begin(); - if (InsertionPoint != ArgVector.end()) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(InsertionPoint, std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::sys::PrintStackTraceOnErrorSignal(argv_[0]); - llvm::PrettyStackTraceProgram X(argc_, argv_); - llvm::llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - SmallVector argv; - llvm::SpecificBumpPtrAllocator ArgAllocator; - std::error_code EC = llvm::sys::Process::GetArgumentVector( - argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator); - if (EC) { - llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n'; - return 1; - } - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef LLVM_ON_WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef LLVM_ON_WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-6.0.0/lld.cpp b/dbms/programs/clang/Compiler-6.0.0/lld.cpp deleted file mode 100644 index 696ff84dfe6..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0/lld.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "lld/Common/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Signals.h" - -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -int mainEntryClickHouseLLD(int Argc, char **Argv) -{ - // Standard set up, so program fails gracefully. - sys::PrintStackTraceOnErrorSignal(Argv[0]); - PrettyStackTraceProgram StackPrinter(Argc, Argv); - llvm_shutdown_obj Shutdown; - - std::vector Args(Argv, Argv + Argc); - return !elf::link(Args, true); -} diff --git a/dbms/programs/clang/Compiler-6.0.0svn b/dbms/programs/clang/Compiler-6.0.0svn deleted file mode 120000 index 7eba9cc37d0..00000000000 --- a/dbms/programs/clang/Compiler-6.0.0svn +++ /dev/null @@ -1 +0,0 @@ -Compiler-6.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-6.0.1 b/dbms/programs/clang/Compiler-6.0.1 deleted file mode 120000 index 7eba9cc37d0..00000000000 --- a/dbms/programs/clang/Compiler-6.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-6.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt b/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt deleted file mode 100644 index a042c821ec4..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1gen_reproducer_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic -clangCrossTU clangIndex - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore - -${REQUIRED_LLVM_LIBRARIES} - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp deleted file mode 100644 index 214bfa72476..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1_main.cpp +++ /dev/null @@ -1,239 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Basic/Stack.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace() { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = DesiredStackSize - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && - rlim.rlim_cur < rlim_t(DesiredStackSize)) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || - rlim.rlim_max >= rlim_t(DesiredStackSize)) - rlim.rlim_cur = DesiredStackSize; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != DesiredStackSize) - return; - } - - // We should now have a stack of size at least DesiredStackSize. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp deleted file mode 100644 index d93b1f5cb1d..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1as_main.cpp +++ /dev/null @@ -1,572 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - -namespace { - -/// Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - std::map DebugPrefixMap; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - std::string SplitDwarfFile; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - auto ArgString = A->getAsString(Args); - std::string Nearest; - if (OptTbl->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) - Diags.Report(diag::err_drv_unknown_argument) << ArgString; - else - Diags.Report(diag::err_drv_unknown_argument_with_suggestion) - << ArgString << Nearest; - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) - Opts.DebugPrefixMap.insert(StringRef(Arg).split('=')); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(StringRef Path, DiagnosticsEngine &Diags, bool Binary) { - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Path != "-") - sys::RemoveFileOnSignal(Path); - - std::error_code EC; - auto Out = llvm::make_unique( - Path, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - std::unique_ptr FDOS = - getOutputStream(Opts.OutputPath, Diags, IsBinary); - if (!FDOS) - return true; - std::unique_ptr DwoOS; - if (!Opts.SplitDwarfFile.empty()) - DwoOS = getOutputStream(Opts.SplitDwarfFile, Diags, IsBinary); - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.DebugPrefixMap.empty()) - for (const auto &KV : Opts.DebugPrefixMap) - Ctx.addDebugPrefixMapEntry(KV.first, KV.second); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - - std::unique_ptr CE; - if (Opts.ShowEncoding) - CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), - Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - std::unique_ptr CE( - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - std::unique_ptr OW = - DwoOS ? MAB->createDwoObjectWriter(*Out, *DwoOS) - : MAB->createObjectWriter(*Out); - - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI, - Opts.RelaxAll, Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - // Assembly to object compilation should leverage assembly info. - Str->setUseAssemblerInfoForParsing(true); - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value = 1; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed) { - if (Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - if (!Opts.SplitDwarfFile.empty() && Opts.SplitDwarfFile != "-") - sys::fs::remove(Opts.SplitDwarfFile); - } - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp b/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp deleted file mode 100644 index a4c034d8d35..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/cc1gen_reproducer_main.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===-- cc1gen_reproducer_main.cpp - Clang reproducer generator ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1gen-reproducer functionality, which -// generates reproducers for invocations for clang-based tools. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/VirtualFileSystem.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" - -using namespace clang; - -namespace { - -struct UnsavedFileHash { - std::string Name; - std::string MD5; -}; - -struct ClangInvocationInfo { - std::string Toolchain; - std::string LibclangOperation; - std::string LibclangOptions; - std::vector Arguments; - std::vector InvocationArguments; - std::vector UnsavedFileHashes; - bool Dump = false; -}; - -} // end anonymous namespace - -LLVM_YAML_IS_SEQUENCE_VECTOR(UnsavedFileHash) - -namespace llvm { -namespace yaml { - -template <> struct MappingTraits { - static void mapping(IO &IO, UnsavedFileHash &Info) { - IO.mapRequired("name", Info.Name); - IO.mapRequired("md5", Info.MD5); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, ClangInvocationInfo &Info) { - IO.mapRequired("toolchain", Info.Toolchain); - IO.mapOptional("libclang.operation", Info.LibclangOperation); - IO.mapOptional("libclang.opts", Info.LibclangOptions); - IO.mapRequired("args", Info.Arguments); - IO.mapOptional("invocation-args", Info.InvocationArguments); - IO.mapOptional("unsaved_file_hashes", Info.UnsavedFileHashes); - } -}; - -} // end namespace yaml -} // end namespace llvm - -static std::string generateReproducerMetaInfo(const ClangInvocationInfo &Info) { - std::string Result; - llvm::raw_string_ostream OS(Result); - OS << '{'; - bool NeedComma = false; - auto EmitKey = [&](StringRef Key) { - if (NeedComma) - OS << ", "; - NeedComma = true; - OS << '"' << Key << "\": "; - }; - auto EmitStringKey = [&](StringRef Key, StringRef Value) { - if (Value.empty()) - return; - EmitKey(Key); - OS << '"' << Value << '"'; - }; - EmitStringKey("libclang.operation", Info.LibclangOperation); - EmitStringKey("libclang.opts", Info.LibclangOptions); - if (!Info.InvocationArguments.empty()) { - EmitKey("invocation-args"); - OS << '['; - for (const auto &Arg : llvm::enumerate(Info.InvocationArguments)) { - if (Arg.index()) - OS << ','; - OS << '"' << Arg.value() << '"'; - } - OS << ']'; - } - OS << '}'; - // FIXME: Compare unsaved file hashes and report mismatch in the reproducer. - if (Info.Dump) - llvm::outs() << "REPRODUCER METAINFO: " << OS.str() << "\n"; - return std::move(OS.str()); -} - -/// Generates a reproducer for a set of arguments from a specific invocation. -static llvm::Optional -generateReproducerForInvocationArguments(ArrayRef Argv, - const ClangInvocationInfo &Info) { - using namespace driver; - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(Argv[0]); - - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions; - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new IgnoringDiagConsumer()); - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - Driver TheDriver(Argv[0], llvm::sys::getDefaultTargetTriple(), Diags); - TheDriver.setTargetAndMode(TargetAndMode); - - std::unique_ptr C(TheDriver.BuildCompilation(Argv)); - if (C && !C->containsError()) { - for (const auto &J : C->getJobs()) { - if (const Command *Cmd = dyn_cast(&J)) { - Driver::CompilationDiagnosticReport Report; - TheDriver.generateCompilationDiagnostics( - *C, *Cmd, generateReproducerMetaInfo(Info), &Report); - return Report; - } - } - } - - return None; -} - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes); - -static void printReproducerInformation( - llvm::raw_ostream &OS, const ClangInvocationInfo &Info, - const driver::Driver::CompilationDiagnosticReport &Report) { - OS << "REPRODUCER:\n"; - OS << "{\n"; - OS << R"("files":[)"; - for (const auto &File : llvm::enumerate(Report.TemporaryFiles)) { - if (File.index()) - OS << ','; - OS << '"' << File.value() << '"'; - } - OS << "]\n}\n"; -} - -int cc1gen_reproducer_main(ArrayRef Argv, const char *Argv0, - void *MainAddr) { - if (Argv.size() < 1) { - llvm::errs() << "error: missing invocation file\n"; - return 1; - } - // Parse the invocation descriptor. - StringRef Input = Argv[0]; - llvm::ErrorOr> Buffer = - llvm::MemoryBuffer::getFile(Input); - if (!Buffer) { - llvm::errs() << "error: failed to read " << Input << ": " - << Buffer.getError().message() << "\n"; - return 1; - } - llvm::yaml::Input YAML(Buffer.get()->getBuffer()); - ClangInvocationInfo InvocationInfo; - YAML >> InvocationInfo; - if (Argv.size() > 1 && Argv[1] == StringRef("-v")) - InvocationInfo.Dump = true; - - // Create an invocation that will produce the reproducer. - std::vector DriverArgs; - for (const auto &Arg : InvocationInfo.Arguments) - DriverArgs.push_back(Arg.c_str()); - std::string Path = GetExecutablePath(Argv0, /*CanonicalPrefixes=*/true); - DriverArgs[0] = Path.c_str(); - llvm::Optional Report = - generateReproducerForInvocationArguments(DriverArgs, InvocationInfo); - - // Emit the information about the reproduce files to stdout. - int Result = 1; - if (Report) { - printReproducerInformation(llvm::outs(), InvocationInfo, *Report); - Result = 0; - } - - // Remove the input file. - llvm::sys::fs::remove(Input); - return Result; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/driver.cpp b/dbms/programs/clang/Compiler-7.0.0/driver.cpp deleted file mode 100644 index 79d71b08ba7..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/driver.cpp +++ /dev/null @@ -1,514 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Driver/Driver.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1gen_reproducer_main(ArrayRef Argv, - const char *Argv0, void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - int InsertionPoint = 0; - if (ArgVector.size() > 0) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(ArgVector.begin() + InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(ArgVector.begin() + InsertionPoint, - std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "gen-reproducer") - return cc1gen_reproducer_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'. " - << "Valid tools include '-cc1' and '-cc1as'.\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, /* const */ char **argv_) { - llvm::InitLLVM X(argc_, argv_); - SmallVector argv(argv_, argv_ + argc_); - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef _WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef _WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-7.0.0/lld.cpp b/dbms/programs/clang/Compiler-7.0.0/lld.cpp deleted file mode 100644 index 8e118b6e24b..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0/lld.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===- tools/lld/lld.cpp - Linker Driver Dispatcher -----------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the main function of the lld executable. The main -// function is a thin wrapper which dispatches to the platform specific -// driver. -// -// lld is a single executable that contains four different linkers for ELF, -// COFF, WebAssembly and Mach-O. The main function dispatches according to -// argv[0] (i.e. command name). The most common name for each target is shown -// below: -// -// - ld.lld: ELF (Unix) -// - ld64: Mach-O (macOS) -// - lld-link: COFF (Windows) -// - ld-wasm: WebAssembly -// -// lld can be invoked as "lld" along with "-flavor" option. This is for -// backward compatibility and not recommended. -// -//===----------------------------------------------------------------------===// - -#include "lld/Common/Driver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include -using namespace lld; -using namespace llvm; -using namespace llvm::sys; - -/* - -enum Flavor { - Invalid, - Gnu, // -flavor gnu - WinLink, // -flavor link - Darwin, // -flavor darwin - Wasm, // -flavor wasm -}; - -LLVM_ATTRIBUTE_NORETURN static void die(const Twine &S) { - errs() << S << "\n"; - exit(1); -} - -static Flavor getFlavor(StringRef S) { - return StringSwitch(S) - .CasesLower("ld", "ld.lld", "gnu", Gnu) - .CasesLower("wasm", "ld-wasm", Wasm) - .CaseLower("link", WinLink) - .CasesLower("ld64", "ld64.lld", "darwin", Darwin) - .Default(Invalid); -} - -static bool isPETarget(const std::vector &V) { - for (auto It = V.begin(); It + 1 != V.end(); ++It) { - if (StringRef(*It) != "-m") - continue; - StringRef S = *(It + 1); - return S == "i386pe" || S == "i386pep" || S == "thumb2pe" || S == "arm64pe"; - } - return false; -} - -static Flavor parseProgname(StringRef Progname) { -#if __APPLE__ - // Use Darwin driver for "ld" on Darwin. - if (Progname == "ld") - return Darwin; -#endif - -#if LLVM_ON_UNIX - // Use GNU driver for "ld" on other Unix-like system. - if (Progname == "ld") - return Gnu; -#endif - - // Progname may be something like "lld-gnu". Parse it. - SmallVector V; - Progname.split(V, "-"); - for (StringRef S : V) - if (Flavor F = getFlavor(S)) - return F; - return Invalid; -} - -static Flavor parseFlavor(std::vector &V) { - // Parse -flavor option. - if (V.size() > 1 && V[1] == StringRef("-flavor")) { - if (V.size() <= 2) - die("missing arg value for '-flavor'"); - Flavor F = getFlavor(V[2]); - if (F == Invalid) - die("Unknown flavor: " + StringRef(V[2])); - V.erase(V.begin() + 1, V.begin() + 3); - return F; - } - - // Deduct the flavor from argv[0]. - StringRef Arg0 = path::filename(V[0]); - if (Arg0.endswith_lower(".exe")) - Arg0 = Arg0.drop_back(4); - return parseProgname(Arg0); -} -*/ - -// If this function returns true, lld calls _exit() so that it quickly -// exits without invoking destructors of globally allocated objects. -// -// We don't want to do that if we are running tests though, because -// doing that breaks leak sanitizer. So, lit sets this environment variable, -// and we use it to detect whether we are running tests or not. -static bool canExitEarly() { return StringRef(getenv("LLD_IN_TEST")) != "1"; } - -/// Universal linker main(). This linker emulates the gnu, darwin, or -/// windows linker based on the argv[0] or -flavor option. -int mainEntryClickHouseLLD(int Argc, /* const */ char **Argv) { - InitLLVM X(Argc, Argv); - - std::vector Args(Argv, Argv + Argc); -/* - switch (parseFlavor(Args)) { - case Gnu: - if (isPETarget(Args)) - return !mingw::link(Args); -*/ - return !elf::link(Args, canExitEarly()); -/* - case WinLink: - return !coff::link(Args, canExitEarly()); - case Darwin: - return !mach_o::link(Args, canExitEarly()); - case Wasm: - return !wasm::link(Args, canExitEarly()); - default: - die("lld is a generic driver.\n" - "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-lld" - " (WebAssembly) instead"); - } -*/ -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt b/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt deleted file mode 100644 index a5f8314b862..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG) - -link_directories(${LLVM_LIBRARY_DIRS}) - -add_library(clickhouse-compiler-lib - driver.cpp - cc1_main.cpp - cc1as_main.cpp - lld.cpp) - -target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) # cant compile with -fno-rtti - -llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - -message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") - -target_include_directories(clickhouse-compiler-lib SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) - -# This is extracted almost directly from CMakeFiles/.../link.txt in LLVM build directory. - -target_link_libraries(clickhouse-compiler-lib PRIVATE - -clangBasic clangCodeGen clangDriver -clangFrontend -clangFrontendTool -clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend -clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers -clangASTMatchers clangStaticAnalyzerCore clangAnalysis clangAST clangRewrite clangLex clangBasic -clangCrossTU clangIndex - -lldCOFF -lldDriver -lldELF -lldMinGW -lldMachO -lldReaderWriter -lldYAML -lldCommon -lldCore - -${REQUIRED_LLVM_LIBRARIES} - -PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads -${MALLOC_LIBRARIES} -${GLIBC_COMPATIBILITY_LIBRARIES} -${MEMCPY_LIBRARIES} -) diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp deleted file mode 100644 index 3686475dd42..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/cc1_main.cpp +++ /dev/null @@ -1,243 +0,0 @@ -//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1 functionality, which implements the -// core compiler functionality along with a number of additional tools for -// demonstration and testing purposes. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Option/Arg.h" -#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" -#include "clang/Config/config.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticBuffer.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/FrontendTool/Utils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/LinkAllPasses.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include - -#ifdef CLANG_HAVE_RLIMITS -#include -#endif - -// have no .a version in packages -#undef LINK_POLLY_INTO_TOOLS - -using namespace clang; -using namespace llvm::opt; - -//===----------------------------------------------------------------------===// -// Main driver -//===----------------------------------------------------------------------===// - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool GenCrashDiag) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // Run the interrupt handlers to make sure any special cleanups get done, in - // particular that we remove files registered with RemoveFileOnSignal. - llvm::sys::RunInterruptHandlers(); - - // We cannot recover from llvm errors. When reporting a fatal error, exit - // with status 70 to generate crash diagnostics. For BSD systems this is - // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); -} - -#ifdef LINK_POLLY_INTO_TOOLS -namespace polly { -void initializePollyPasses(llvm::PassRegistry &Registry); -} -#endif - -#ifdef CLANG_HAVE_RLIMITS -// The amount of stack we think is "sufficient". If less than this much is -// available, we may be unable to reach our template instantiation depth -// limit and other similar limits. -// FIXME: Unify this with the stack we request when spawning a thread to build -// a module. -static const int kSufficientStack = 8 << 20; - -#if defined(__linux__) && defined(__PIE__) -static size_t getCurrentStackAllocation() { - // If we can't compute the current stack usage, allow for 512K of command - // line arguments and environment. - size_t Usage = 512 * 1024; - if (FILE *StatFile = fopen("/proc/self/stat", "r")) { - // We assume that the stack extends from its current address to the end of - // the environment space. In reality, there is another string literal (the - // program name) after the environment, but this is close enough (we only - // need to be within 100K or so). - unsigned long StackPtr, EnvEnd; - // Disable silly GCC -Wformat warning that complains about length - // modifiers on ignored format specifiers. We want to retain these - // for documentation purposes even though they have no effect. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat" -#endif - if (fscanf(StatFile, - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %*lu " - "%*lu %*ld %*ld %*ld %*ld %*ld %*ld %*llu %*lu %*ld %*lu %*lu " - "%*lu %*lu %lu %*lu %*lu %*lu %*lu %*lu %*llu %*lu %*lu %*d %*d " - "%*u %*u %*llu %*lu %*ld %*lu %*lu %*lu %*lu %*lu %*lu %lu %*d", - &StackPtr, &EnvEnd) == 2) { -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - Usage = StackPtr < EnvEnd ? EnvEnd - StackPtr : StackPtr - EnvEnd; - } - fclose(StatFile); - } - return Usage; -} - -#include - -LLVM_ATTRIBUTE_NOINLINE -static void ensureStackAddressSpace(int ExtraChunks = 0) { - // Linux kernels prior to 4.1 will sometimes locate the heap of a PIE binary - // relatively close to the stack (they are only guaranteed to be 128MiB - // apart). This results in crashes if we happen to heap-allocate more than - // 128MiB before we reach our stack high-water mark. - // - // To avoid these crashes, ensure that we have sufficient virtual memory - // pages allocated before we start running. - size_t Curr = getCurrentStackAllocation(); - const int kTargetStack = kSufficientStack - 256 * 1024; - if (Curr < kTargetStack) { - volatile char *volatile Alloc = - static_cast(alloca(kTargetStack - Curr)); - Alloc[0] = 0; - Alloc[kTargetStack - Curr - 1] = 0; - } -} -#else -static void ensureStackAddressSpace() {} -#endif - -/// Attempt to ensure that we have at least 8MiB of usable stack space. -static void ensureSufficientStack() { - struct rlimit rlim; - if (getrlimit(RLIMIT_STACK, &rlim) != 0) - return; - - // Increase the soft stack limit to our desired level, if necessary and - // possible. - if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < kSufficientStack) { - // Try to allocate sufficient stack. - if (rlim.rlim_max == RLIM_INFINITY || rlim.rlim_max >= kSufficientStack) - rlim.rlim_cur = kSufficientStack; - else if (rlim.rlim_cur == rlim.rlim_max) - return; - else - rlim.rlim_cur = rlim.rlim_max; - - if (setrlimit(RLIMIT_STACK, &rlim) != 0 || - rlim.rlim_cur != kSufficientStack) - return; - } - - // We should now have a stack of size at least kSufficientStack. Ensure - // that we can actually use that much, if necessary. - ensureStackAddressSpace(); -} -#else -static void ensureSufficientStack() {} -#endif - -int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { - ensureSufficientStack(); - - std::unique_ptr Clang(new CompilerInstance()); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); - PCHOps->registerWriter(llvm::make_unique()); - PCHOps->registerReader(llvm::make_unique()); - - // Initialize targets first, so that --version shows registered targets. - llvm::InitializeAllTargets(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmPrinters(); - llvm::InitializeAllAsmParsers(); - -#ifdef LINK_POLLY_INTO_TOOLS - llvm::PassRegistry &Registry = *llvm::PassRegistry::getPassRegistry(); - polly::initializePollyPasses(Registry); -#endif - - // Buffer diagnostics from argument parsing so that we can output them using a - // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); - bool Success = CompilerInvocation::CreateFromArgs( - Clang->getInvocation(), Argv.begin(), Argv.end(), Diags); - - // Infer the builtin include path if unspecified. - if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && - Clang->getHeaderSearchOpts().ResourceDir.empty()) - Clang->getHeaderSearchOpts().ResourceDir = - CompilerInvocation::GetResourcesPath(Argv0, MainAddr); - - // Create the actual diagnostics engine. - Clang->createDiagnostics(); - if (!Clang->hasDiagnostics()) - return 1; - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - llvm::install_fatal_error_handler(LLVMErrorHandler, - static_cast(&Clang->getDiagnostics())); - - DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); - if (!Success) - return 1; - - // Execute the frontend actions. - Success = ExecuteCompilerInvocation(Clang.get()); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - - // Our error handler depends on the Diagnostics object, which we're - // potentially about to delete. Uninstall the handler now so that any - // later errors use the default handling behavior instead. - llvm::remove_fatal_error_handler(); - - // When running with -disable-free, don't do any destruction or shutdown. - if (Clang->getFrontendOpts().DisableFree) { - BuryPointer(std::move(Clang)); - return !Success; - } - - return !Success; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp deleted file mode 100644 index ce23422077f..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/cc1as_main.cpp +++ /dev/null @@ -1,555 +0,0 @@ -//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang -cc1as functionality, which implements -// the direct interface to the LLVM MC based assembler. -// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/FrontendDiagnostic.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace llvm; -using namespace llvm::opt; - - -namespace { - -/// \brief Helper class for representing a single invocation of the assembler. -struct AssemblerInvocation { - /// @name Target Options - /// @{ - - /// The name of the target triple to assemble for. - std::string Triple; - - /// If given, the name of the target CPU to determine which instructions - /// are legal. - std::string CPU; - - /// The list of target specific features to enable or disable -- this should - /// be a list of strings starting with '+' or '-'. - std::vector Features; - - /// The list of symbol definitions. - std::vector SymbolDefs; - - /// @} - /// @name Language Options - /// @{ - - std::vector IncludePaths; - unsigned NoInitialTextSection : 1; - unsigned SaveTemporaryLabels : 1; - unsigned GenDwarfForAssembly : 1; - unsigned RelaxELFRelocations : 1; - unsigned DwarfVersion; - std::string DwarfDebugFlags; - std::string DwarfDebugProducer; - std::string DebugCompilationDir; - llvm::DebugCompressionType CompressDebugSections = - llvm::DebugCompressionType::None; - std::string MainFileName; - - /// @} - /// @name Frontend Options - /// @{ - - std::string InputFile; - std::vector LLVMArgs; - std::string OutputPath; - enum FileType { - FT_Asm, ///< Assembly (.s) output, transliterate mode. - FT_Null, ///< No output, for timing purposes. - FT_Obj ///< Object file output. - }; - FileType OutputType; - unsigned ShowHelp : 1; - unsigned ShowVersion : 1; - - /// @} - /// @name Transliterate Options - /// @{ - - unsigned OutputAsmVariant; - unsigned ShowEncoding : 1; - unsigned ShowInst : 1; - - /// @} - /// @name Assembler Options - /// @{ - - unsigned RelaxAll : 1; - unsigned NoExecStack : 1; - unsigned FatalWarnings : 1; - unsigned IncrementalLinkerCompatible : 1; - - /// The name of the relocation model to use. - std::string RelocationModel; - - /// @} - -public: - AssemblerInvocation() { - Triple = ""; - NoInitialTextSection = 0; - InputFile = "-"; - OutputPath = "-"; - OutputType = FT_Asm; - OutputAsmVariant = 0; - ShowInst = 0; - ShowEncoding = 0; - RelaxAll = 0; - NoExecStack = 0; - FatalWarnings = 0; - IncrementalLinkerCompatible = 0; - DwarfVersion = 0; - } - - static bool CreateFromArgs(AssemblerInvocation &Res, - ArrayRef Argv, - DiagnosticsEngine &Diags); -}; - -} - -bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, - ArrayRef Argv, - DiagnosticsEngine &Diags) { - bool Success = true; - - // Parse the arguments. - std::unique_ptr OptTbl(createDriverOptTable()); - - const unsigned IncludedFlagsBitmask = options::CC1AsOption; - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount, - IncludedFlagsBitmask); - - // Check for missing argument error. - if (MissingArgCount) { - Diags.Report(diag::err_drv_missing_argument) - << Args.getArgString(MissingArgIndex) << MissingArgCount; - Success = false; - } - - // Issue errors on unknown arguments. - for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { - auto ArgString = A->getAsString(Args); - std::string Nearest; - if (OptTbl->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) - Diags.Report(diag::err_drv_unknown_argument) << ArgString; - else - Diags.Report(diag::err_drv_unknown_argument_with_suggestion) - << ArgString << Nearest; - Success = false; - } - - // Construct the invocation. - - // Target Options - Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); - Opts.CPU = Args.getLastArgValue(OPT_target_cpu); - Opts.Features = Args.getAllArgValues(OPT_target_feature); - - // Use the default target triple if unspecified. - if (Opts.Triple.empty()) - Opts.Triple = llvm::sys::getDefaultTargetTriple(); - - // Language Options - Opts.IncludePaths = Args.getAllArgValues(OPT_I); - Opts.NoInitialTextSection = Args.hasArg(OPT_n); - Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); - // Any DebugInfoKind implies GenDwarfForAssembly. - Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); - - if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, - OPT_compress_debug_sections_EQ)) { - if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; - } else { - Opts.CompressDebugSections = - llvm::StringSwitch(A->getValue()) - .Case("none", llvm::DebugCompressionType::None) - .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) - .Default(llvm::DebugCompressionType::None); - } - } - - Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); - Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); - Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer); - Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name); - - // Frontend Options - if (Args.hasArg(OPT_INPUT)) { - bool First = true; - for (const Arg *A : Args.filtered(OPT_INPUT)) { - if (First) { - Opts.InputFile = A->getValue(); - First = false; - } else { - Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); - Success = false; - } - } - } - Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); - Opts.OutputPath = Args.getLastArgValue(OPT_o); - if (Arg *A = Args.getLastArg(OPT_filetype)) { - StringRef Name = A->getValue(); - unsigned OutputType = StringSwitch(Name) - .Case("asm", FT_Asm) - .Case("null", FT_Null) - .Case("obj", FT_Obj) - .Default(~0U); - if (OutputType == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; - Success = false; - } else - Opts.OutputType = FileType(OutputType); - } - Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowVersion = Args.hasArg(OPT_version); - - // Transliterate Options - Opts.OutputAsmVariant = - getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); - Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); - Opts.ShowInst = Args.hasArg(OPT_show_inst); - - // Assemble Options - Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); - Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); - Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); - Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); - Opts.IncrementalLinkerCompatible = - Args.hasArg(OPT_mincremental_linker_compatible); - Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); - - return Success; -} - -static std::unique_ptr -getOutputStream(AssemblerInvocation &Opts, DiagnosticsEngine &Diags, - bool Binary) { - if (Opts.OutputPath.empty()) - Opts.OutputPath = "-"; - - // Make sure that the Out file gets unlinked from the disk if we get a - // SIGINT. - if (Opts.OutputPath != "-") - sys::RemoveFileOnSignal(Opts.OutputPath); - - std::error_code EC; - auto Out = llvm::make_unique( - Opts.OutputPath, EC, (Binary ? sys::fs::F_None : sys::fs::F_Text)); - if (EC) { - Diags.Report(diag::err_fe_unable_to_open_output) << Opts.OutputPath - << EC.message(); - return nullptr; - } - - return Out; -} - -static bool ExecuteAssembler(AssemblerInvocation &Opts, - DiagnosticsEngine &Diags) { - // Get the target specific parser. - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); - if (!TheTarget) - return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(Opts.InputFile); - - if (std::error_code EC = Buffer.getError()) { - Error = EC.message(); - return Diags.Report(diag::err_fe_error_reading) << Opts.InputFile; - } - - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); - - // Record the location of the include directories so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(Opts.IncludePaths); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, Opts.Triple)); - assert(MAI && "Unable to create target asm info!"); - - // Ensure MCAsmInfo initialization occurs before any use, otherwise sections - // may be created with a combination of default and explicit settings. - MAI->setCompressDebugSections(Opts.CompressDebugSections); - - MAI->setRelaxELFRelocations(Opts.RelaxELFRelocations); - - bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; - std::unique_ptr FDOS = getOutputStream(Opts, Diags, IsBinary); - if (!FDOS) - return true; - - // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and - // MCObjectFileInfo needs a MCContext reference in order to initialize itself. - std::unique_ptr MOFI(new MCObjectFileInfo()); - - MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr); - - bool PIC = false; - if (Opts.RelocationModel == "static") { - PIC = false; - } else if (Opts.RelocationModel == "pic") { - PIC = true; - } else { - assert(Opts.RelocationModel == "dynamic-no-pic" && - "Invalid PIC model!"); - PIC = false; - } - - MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), PIC, Ctx); - if (Opts.SaveTemporaryLabels) - Ctx.setAllowTemporaryLabels(false); - if (Opts.GenDwarfForAssembly) - Ctx.setGenDwarfForAssembly(true); - if (!Opts.DwarfDebugFlags.empty()) - Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); - if (!Opts.DwarfDebugProducer.empty()) - Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); - if (!Opts.DebugCompilationDir.empty()) - Ctx.setCompilationDir(Opts.DebugCompilationDir); - if (!Opts.MainFileName.empty()) - Ctx.setMainFileName(StringRef(Opts.MainFileName)); - Ctx.setDwarfVersion(Opts.DwarfVersion); - - // Build up the feature string from the target feature list. - std::string FS; - if (!Opts.Features.empty()) { - FS = Opts.Features[0]; - for (unsigned i = 1, e = Opts.Features.size(); i != e; ++i) - FS += "," + Opts.Features[i]; - } - - std::unique_ptr Str; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); - - raw_pwrite_stream *Out = FDOS.get(); - std::unique_ptr BOS; - - // FIXME: There is a bit of code duplication with addPassesToEmitFile. - if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); - - std::unique_ptr CE; - if (Opts.ShowEncoding) - CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - auto FOut = llvm::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer( - Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), - Opts.ShowInst)); - } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { - Str.reset(createNullStreamer(Ctx)); - } else { - assert(Opts.OutputType == AssemblerInvocation::FT_Obj && - "Invalid file type!"); - if (!FDOS->supportsSeeking()) { - BOS = make_unique(*FDOS); - Out = BOS.get(); - } - - std::unique_ptr CE( - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - MCTargetOptions MCOptions; - std::unique_ptr MAB( - TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - - Triple T(Opts.Triple); - Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::move(MAB), *Out, std::move(CE), *STI, Opts.RelaxAll, - Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); - Str.get()->InitSections(Opts.NoExecStack); - } - - // Assembly to object compilation should leverage assembly info. - Str->setUseAssemblerInfoForParsing(true); - - bool Failed = false; - - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); - - // FIXME: init MCTargetOptions from sanitizer flags here. - MCTargetOptions Options; - std::unique_ptr TAP( - TheTarget->createMCAsmParser(*STI, *Parser, *MCII, Options)); - if (!TAP) - Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; - - // Set values for symbols, if any. - for (auto &S : Opts.SymbolDefs) { - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto Val = Pair.second; - int64_t Value; - // We have already error checked this in the driver. - Val.getAsInteger(0, Value); - Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); - } - - if (!Failed) { - Parser->setTargetParser(*TAP.get()); - Failed = Parser->Run(Opts.NoInitialTextSection); - } - - // Close Streamer first. - // It might have a reference to the output stream. - Str.reset(); - // Close the output stream early. - BOS.reset(); - FDOS.reset(); - - // Delete output file if there were errors. - if (Failed && Opts.OutputPath != "-") - sys::fs::remove(Opts.OutputPath); - - return Failed; -} - -static void LLVMErrorHandler(void *UserData, const std::string &Message, - bool /*GenCrashDiag*/) { - DiagnosticsEngine &Diags = *static_cast(UserData); - - Diags.Report(diag::err_fe_error_backend) << Message; - - // We cannot recover from llvm errors. - exit(1); -} - -int cc1as_main(ArrayRef Argv, const char */*Argv0*/, void */*MainAddr*/) { - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - - // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); - DiagClient->setPrefix("clang -cc1as"); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - // Set an error handler, so that any LLVM backend diagnostics go through our - // error handler. - ScopedFatalErrorHandler FatalErrorHandler - (LLVMErrorHandler, static_cast(&Diags)); - - // Parse the arguments. - AssemblerInvocation Asm; - if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) - return 1; - - if (Asm.ShowHelp) { - std::unique_ptr Opts(driver::createDriverOptTable()); - Opts->PrintHelp(llvm::outs(), "clang -cc1as", "Clang Integrated Assembler", - /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, - /*ShowAllAliases=*/false); - return 0; - } - - // Honor -version. - // - // FIXME: Use a better -version message? - if (Asm.ShowVersion) { - llvm::cl::PrintVersionMessage(); - return 0; - } - - // Honor -mllvm. - // - // FIXME: Remove this, one day. - if (!Asm.LLVMArgs.empty()) { - unsigned NumArgs = Asm.LLVMArgs.size(); - auto Args = llvm::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) - Args[i + 1] = Asm.LLVMArgs[i].c_str(); - Args[NumArgs + 1] = nullptr; - llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); - } - - // Execute the invocation, unless there were parsing errors. - bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); - - // If any timers were active but haven't been destroyed yet, print their - // results now. - TimerGroup::printAll(errs()); - - return !!Failed; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp deleted file mode 100644 index 9a061b9d137..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/driver.cpp +++ /dev/null @@ -1,512 +0,0 @@ -//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the entry point to the clang driver; it is a thin wrapper -// for functionality in the Driver clang library. -// -//===----------------------------------------------------------------------===// - -#include "clang/Driver/Driver.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Frontend/ChainedDiagnosticConsumer.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/SerializedDiagnosticPrinter.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -using namespace clang; -using namespace clang::driver; -using namespace llvm::opt; - -std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { - if (!CanonicalPrefixes) { - SmallString<128> ExecutablePath(Argv0); - // Do a PATH lookup if Argv0 isn't a valid path. - if (!llvm::sys::fs::exists(ExecutablePath)) - if (llvm::ErrorOr P = - llvm::sys::findProgramByName(ExecutablePath)) - ExecutablePath = *P; - return ExecutablePath.str(); - } - - // This just needs to be some symbol in the binary; C++ doesn't - // allow taking the address of ::main however. - void *P = (void*) (intptr_t) GetExecutablePath; - return llvm::sys::fs::getMainExecutable(Argv0, P); -} - -static const char *GetStableCStr(std::set &SavedStrings, - StringRef S) { - return SavedStrings.insert(S).first->c_str(); -} - -/// ApplyQAOverride - Apply a list of edits to the input argument lists. -/// -/// The input string is a space separate list of edits to perform, -/// they are applied in order to the input argument lists. Edits -/// should be one of the following forms: -/// -/// '#': Silence information about the changes to the command line arguments. -/// -/// '^': Add FOO as a new argument at the beginning of the command line. -/// -/// '+': Add FOO as a new argument at the end of the command line. -/// -/// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command -/// line. -/// -/// 'xOPTION': Removes all instances of the literal argument OPTION. -/// -/// 'XOPTION': Removes all instances of the literal argument OPTION, -/// and the following argument. -/// -/// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' -/// at the end of the command line. -/// -/// \param OS - The stream to write edit information to. -/// \param Args - The vector of command line arguments. -/// \param Edit - The override command to perform. -/// \param SavedStrings - Set to use for storing string representations. -static void ApplyOneQAOverride(raw_ostream &OS, - SmallVectorImpl &Args, - StringRef Edit, - std::set &SavedStrings) { - // This does not need to be efficient. - - if (Edit[0] == '^') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at beginning\n"; - Args.insert(Args.begin() + 1, Str); - } else if (Edit[0] == '+') { - const char *Str = - GetStableCStr(SavedStrings, Edit.substr(1)); - OS << "### Adding argument " << Str << " at end\n"; - Args.push_back(Str); - } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { - StringRef MatchPattern = Edit.substr(2).split('/').first; - StringRef ReplPattern = Edit.substr(2).split('/').second; - ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); - - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - // Ignore end-of-line response file markers - if (Args[i] == nullptr) - continue; - std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); - - if (Repl != Args[i]) { - OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; - Args[i] = GetStableCStr(SavedStrings, Repl); - } - } - } else if (Edit[0] == 'x' || Edit[0] == 'X') { - auto Option = Edit.substr(1); - for (unsigned i = 1; i < Args.size();) { - if (Option == Args[i]) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - if (Edit[0] == 'X') { - if (i < Args.size()) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - OS << "### Invalid X edit, end of command line!\n"; - } - } else - ++i; - } - } else if (Edit[0] == 'O') { - for (unsigned i = 1; i < Args.size();) { - const char *A = Args[i]; - // Ignore end-of-line response file markers - if (A == nullptr) - continue; - if (A[0] == '-' && A[1] == 'O' && - (A[2] == '\0' || - (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || - ('0' <= A[2] && A[2] <= '9'))))) { - OS << "### Deleting argument " << Args[i] << '\n'; - Args.erase(Args.begin() + i); - } else - ++i; - } - OS << "### Adding argument " << Edit << " at end\n"; - Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); - } else { - OS << "### Unrecognized edit: " << Edit << "\n"; - } -} - -/// ApplyQAOverride - Apply a comma separate list of edits to the -/// input argument lists. See ApplyOneQAOverride. -static void ApplyQAOverride(SmallVectorImpl &Args, - const char *OverrideStr, - std::set &SavedStrings) { - raw_ostream *OS = &llvm::errs(); - - if (OverrideStr[0] == '#') { - ++OverrideStr; - OS = &llvm::nulls(); - } - - *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; - - // This does not need to be efficient. - - const char *S = OverrideStr; - while (*S) { - const char *End = ::strchr(S, ' '); - if (!End) - End = S + strlen(S); - if (End != S) - ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); - S = End; - if (*S != '\0') - ++S; - } -} - -extern int cc1_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1as_main(ArrayRef Argv, const char *Argv0, - void *MainAddr); -extern int cc1gen_reproducer_main(ArrayRef Argv, - const char *Argv0, void *MainAddr); - -static void insertTargetAndModeArgs(const ParsedClangName &NameParts, - SmallVectorImpl &ArgVector, - std::set &SavedStrings) { - // Put target and mode arguments at the start of argument list so that - // arguments specified in command line could override them. Avoid putting - // them at index 0, as an option like '-cc1' must remain the first. - int InsertionPoint = 0; - if (ArgVector.size() > 0) - ++InsertionPoint; - - if (NameParts.DriverMode) { - // Add the mode flag to the arguments. - ArgVector.insert(ArgVector.begin() + InsertionPoint, - GetStableCStr(SavedStrings, NameParts.DriverMode)); - } - - if (NameParts.TargetIsValid) { - const char *arr[] = {"-target", GetStableCStr(SavedStrings, - NameParts.TargetPrefix)}; - ArgVector.insert(ArgVector.begin() + InsertionPoint, - std::begin(arr), std::end(arr)); - } -} - -static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, - SmallVectorImpl &Opts) { - llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); - // The first instance of '#' should be replaced with '=' in each option. - for (const char *Opt : Opts) - if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) - *NumberSignPtr = '='; -} - -static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { - // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. - TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); - if (TheDriver.CCPrintOptions) - TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); - - // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. - TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); - if (TheDriver.CCPrintHeaders) - TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); - - // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. - TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); - if (TheDriver.CCLogDiagnostics) - TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); -} - -static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, - const std::string &Path) { - // If the clang binary happens to be named cl.exe for compatibility reasons, - // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. - StringRef ExeBasename(llvm::sys::path::filename(Path)); - if (ExeBasename.equals_lower("cl.exe")) - ExeBasename = "clang-cl.exe"; - DiagClient->setPrefix(ExeBasename); -} - -// This lets us create the DiagnosticsEngine with a properly-filled-out -// DiagnosticOptions instance. -static DiagnosticOptions * -CreateAndPopulateDiagOpts(ArrayRef argv) { - auto *DiagOpts = new DiagnosticOptions; - std::unique_ptr Opts(createDriverOptTable()); - unsigned MissingArgIndex, MissingArgCount; - InputArgList Args = - Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount); - // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. - // Any errors that would be diagnosed here will also be diagnosed later, - // when the DiagnosticsEngine actually exists. - (void)ParseDiagnosticArgs(*DiagOpts, Args); - return DiagOpts; -} - -static void SetInstallDir(SmallVectorImpl &argv, - Driver &TheDriver, bool CanonicalPrefixes) { - // Attempt to find the original path used to invoke the driver, to determine - // the installed path. We do this manually, because we want to support that - // path being a symlink. - SmallString<128> InstalledPath(argv[0]); - - // Do a PATH lookup, if there are no directory components. - if (llvm::sys::path::filename(InstalledPath) == InstalledPath) - if (llvm::ErrorOr Tmp = llvm::sys::findProgramByName( - llvm::sys::path::filename(InstalledPath.str()))) - InstalledPath = *Tmp; - - // FIXME: We don't actually canonicalize this, we just make it absolute. - if (CanonicalPrefixes) - llvm::sys::fs::make_absolute(InstalledPath); - - StringRef InstalledPathParent(llvm::sys::path::parent_path(InstalledPath)); - if (llvm::sys::fs::exists(InstalledPathParent)) - TheDriver.setInstalledDir(InstalledPathParent); -} - -static int ExecuteCC1Tool(ArrayRef argv, StringRef Tool) { - void *GetExecutablePathVP = (void *)(intptr_t) GetExecutablePath; - if (Tool == "") - return cc1_main(argv.slice(2), argv[0], GetExecutablePathVP); - if (Tool == "as") - return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP); - - // Reject unknown tools. - llvm::errs() << "error: unknown integrated tool '" << Tool << "'. " - << "Valid tools include '-cc1' and '-cc1as'.\n"; - return 1; -} - -int mainEntryClickHouseClang(int argc_, char **argv_) { - llvm::InitLLVM X(argc_, argv_); - SmallVector argv(argv_, argv_ + argc_); - - if (llvm::sys::Process::FixupStandardFileDescriptors()) - return 1; - - llvm::InitializeAllTargets(); - auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(argv[0]); - - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver(A); - - // Parse response files using the GNU syntax, unless we're in CL mode. There - // are two ways to put clang in CL compatibility mode: argv[0] is either - // clang-cl or cl, or --driver-mode=cl is on the command line. The normal - // command line parsing can't happen until after response file parsing, so we - // have to manually search for a --driver-mode=cl argument the hard way. - // Finally, our -cc1 tools don't care which tokenization mode we use because - // response files written by clang will tokenize the same way in either mode. - bool ClangCLMode = false; - if (StringRef(TargetAndMode.DriverMode).equals("--driver-mode=cl") || - std::find_if(argv.begin(), argv.end(), [](const char *F) { - return F && strcmp(F, "--driver-mode=cl") == 0; - }) != argv.end()) { - ClangCLMode = true; - } - enum { Default, POSIX, Windows } RSPQuoting = Default; - for (const char *F : argv) { - if (strcmp(F, "--rsp-quoting=posix") == 0) - RSPQuoting = POSIX; - else if (strcmp(F, "--rsp-quoting=windows") == 0) - RSPQuoting = Windows; - } - - // Determines whether we want nullptr markers in argv to indicate response - // files end-of-lines. We only use this for the /LINK driver argument with - // clang-cl.exe on Windows. - bool MarkEOLs = ClangCLMode; - - llvm::cl::TokenizerCallback Tokenizer; - if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) - Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; - else - Tokenizer = &llvm::cl::TokenizeGNUCommandLine; - - if (MarkEOLs && argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) - MarkEOLs = false; - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs); - - // Handle -cc1 integrated tools, even if -cc1 was expanded from a response - // file. - auto FirstArg = std::find_if(argv.begin() + 1, argv.end(), - [](const char *A) { return A != nullptr; }); - if (FirstArg != argv.end() && StringRef(*FirstArg).startswith("-cc1")) { - // If -cc1 came from a response file, remove the EOL sentinels. - if (MarkEOLs) { - auto newEnd = std::remove(argv.begin(), argv.end(), nullptr); - argv.resize(newEnd - argv.begin()); - } - return ExecuteCC1Tool(argv, argv[1] + 4); - } - - bool CanonicalPrefixes = true; - for (int i = 1, size = argv.size(); i < size; ++i) { - // Skip end-of-line response file markers - if (argv[i] == nullptr) - continue; - if (StringRef(argv[i]) == "-no-canonical-prefixes") { - CanonicalPrefixes = false; - break; - } - } - - // Handle CL and _CL_ which permits additional command line options to be - // prepended or appended. - if (ClangCLMode) { - // Arguments in "CL" are prepended. - llvm::Optional OptCL = llvm::sys::Process::GetEnv("CL"); - if (OptCL.hasValue()) { - SmallVector PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); - - // Insert right after the program name to prepend to the argument list. - argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); - } - // Arguments in "_CL_" are appended. - llvm::Optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); - if (Opt_CL_.hasValue()) { - SmallVector AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); - - // Insert at the end of the argument list to append. - argv.append(AppendedOpts.begin(), AppendedOpts.end()); - } - } - - std::set SavedStrings; - // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the - // scenes. - if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { - // FIXME: Driver shouldn't take extra initial argument. - ApplyQAOverride(argv, OverrideStr, SavedStrings); - } - - std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); - - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(argv); - - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); - FixupDiagPrefixExeName(DiagClient, Path); - - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); - - if (!DiagOpts->DiagnosticSerializationFile.empty()) { - auto SerializedConsumer = - clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); - Diags.setClient(new ChainedDiagnosticConsumer( - Diags.takeClient(), std::move(SerializedConsumer))); - } - - ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); - - Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); - SetInstallDir(argv, TheDriver, CanonicalPrefixes); - TheDriver.setTargetAndMode(TargetAndMode); - - insertTargetAndModeArgs(TargetAndMode, argv, SavedStrings); - - SetBackdoorDriverOutputsFromEnvVars(TheDriver); - - std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 1; - if (C && !C->containsError()) { - SmallVector, 4> FailingCommands; - Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } - - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; - - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; -#ifdef _WIN32 - DiagnoseCrash |= CommandRes == 3; -#endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; - } - } - } - - Diags.getClient()->finish(); - - // If any timers were active but haven't been destroyed yet, print their - // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); - -#ifdef _WIN32 - // Exit status should not be negative on Win32, unless abnormal termination. - // Once abnormal termiation was caught, negative status should not be - // propagated. - if (Res < 0) - Res = 1; -#endif - - // If we have multiple failing commands, we return the result of the first - // failing command. - return Res; -} diff --git a/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp b/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp deleted file mode 100644 index 203e50d42a9..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0bundled/lld.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "lld/Common/Driver.h" -#include "llvm/Support/InitLLVM.h" -#include - -int mainEntryClickHouseLLD(int argc, char ** argv) -{ - llvm::InitLLVM X(argc, argv); - std::vector args(argv, argv + argc); - return !lld::elf::link(args, false); -} diff --git a/dbms/programs/clang/Compiler-7.0.0svn b/dbms/programs/clang/Compiler-7.0.0svn deleted file mode 120000 index eeeb5bbc2c0..00000000000 --- a/dbms/programs/clang/Compiler-7.0.0svn +++ /dev/null @@ -1 +0,0 @@ -Compiler-7.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/Compiler-7.0.1 b/dbms/programs/clang/Compiler-7.0.1 deleted file mode 120000 index eeeb5bbc2c0..00000000000 --- a/dbms/programs/clang/Compiler-7.0.1 +++ /dev/null @@ -1 +0,0 @@ -Compiler-7.0.0 \ No newline at end of file diff --git a/dbms/programs/clang/clickhouse-clang.cpp b/dbms/programs/clang/clickhouse-clang.cpp deleted file mode 100644 index 261ae18b6d3..00000000000 --- a/dbms/programs/clang/clickhouse-clang.cpp +++ /dev/null @@ -1,2 +0,0 @@ -int mainEntryClickHouseClang(int argc, char ** argv); -int main(int argc_, char ** argv_) { return mainEntryClickHouseClang(argc_, argv_); } diff --git a/dbms/programs/clang/clickhouse-lld.cpp b/dbms/programs/clang/clickhouse-lld.cpp deleted file mode 100644 index baa6182d66d..00000000000 --- a/dbms/programs/clang/clickhouse-lld.cpp +++ /dev/null @@ -1,2 +0,0 @@ -int mainEntryClickHouseLLD(int argc, char ** argv); -int main(int argc_, char ** argv_) { return mainEntryClickHouseLLD(argc_, argv_); } diff --git a/dbms/programs/clang/copy_headers.sh b/dbms/programs/clang/copy_headers.sh deleted file mode 100755 index 45a58855c91..00000000000 --- a/dbms/programs/clang/copy_headers.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env bash - -set -e -#set -x -#echo "Args: $*"; env | sort - -# Этот скрипт собирает все заголовочные файлы, нужные для компиляции некоторого translation unit-а -# и копирует их с сохранением путей в директорию DST. -# Это затем может быть использовано, чтобы скомпилировать translation unit на другом сервере, -# используя ровно такой же набор заголовочных файлов. -# -# Требуется clang, желательно наиболее свежий (trunk). -# -# Используется при сборке пакетов. -# Заголовочные файлы записываются в пакет clickhouse-common, в директорию /usr/share/clickhouse/headers. -# -# Если вы хотите установить их самостоятельно, без сборки пакета, -# чтобы clickhouse-server видел их там, где ожидается, выполните: -# -# sudo ./copy_headers.sh . /usr/share/clickhouse/headers/ - -SOURCE_PATH=${1:-../../..} -DST=${2:-$SOURCE_PATH/../headers} -BUILD_PATH=${BUILD_PATH=${3:-$SOURCE_PATH/build}} - -PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH" - -if [[ -z $CLANG ]]; then - CLANG="clang" -fi - -START_HEADERS=$(echo \ - $BUILD_PATH/dbms/src/Common/config_version.h \ - $SOURCE_PATH/dbms/src/Interpreters/SpecializedAggregator.h \ - $SOURCE_PATH/dbms/src/AggregateFunctions/AggregateFunction*.h) - -for header in $START_HEADERS; do - START_HEADERS_INCLUDE+="-include $header " -done - - -GCC_ROOT=`$CLANG -v 2>&1 | grep "Selected GCC installation"| sed -n -e 's/^.*: //p'` - -# TODO: Does not work on macos? -GCC_ROOT=${GCC_ROOT:=/usr/lib/clang/${CMAKE_CXX_COMPILER_VERSION}} - -# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). -# The latter options are the same that are added while building packages. -for src_file in $(echo | $CLANG -M -xc++ -std=c++1z -Wall -Werror -msse2 -msse4 -mcx16 -mpopcnt -O3 -g -fPIC -fstack-protector -D_FORTIFY_SOURCE=2 \ - -I $GCC_ROOT/include \ - -I $GCC_ROOT/include-fixed \ - $(cat "$BUILD_PATH/include_directories.txt") \ - $START_HEADERS_INCLUDE \ - - | - tr -d '\\' | - sed -E -e 's/^-\.o://'); -do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - dst_file=$(echo $dst_file | sed -E -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; -done - - -# Копируем больше заголовочных файлов с интринсиками, так как на серверах, куда будут устанавливаться -# заголовочные файлы, будет использоваться опция -march=native. - -for src_file in $(ls -1 $($CLANG -v -xc++ - <<<'' 2>&1 | grep '^ /' | grep 'include' | grep -E '/lib/clang/|/include/clang/')/*.h | grep -vE 'arm|altivec|Intrin'); -do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; -done - -if [ -d "$SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail" ]; then - # Even more platform-specific headers - for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail/*); - do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; - done -fi - -if [ -d "$SOURCE_PATH/contrib/boost/boost/smart_ptr/detail" ]; then - for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/boost/smart_ptr/detail/*); - do - dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$dst_file"; - done -fi diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 57821d854e9..3fbbcee0f15 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,11 +56,6 @@ int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if USE_EMBEDDED_COMPILER - int mainEntryClickHouseClang(int argc, char ** argv); - int mainEntryClickHouseLLD(int argc, char ** argv); -#endif - namespace { @@ -100,12 +95,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) {"obfuscator", mainEntryClickHouseObfuscator}, #endif - -#if USE_EMBEDDED_COMPILER - {"clang", mainEntryClickHouseClang}, - {"clang++", mainEntryClickHouseClang}, - {"lld", mainEntryClickHouseLLD}, -#endif }; @@ -152,11 +141,6 @@ int main(int argc_, char ** argv_) /// will work only after additional call of this function. updatePHDRCache(); -#if USE_EMBEDDED_COMPILER - if (argc_ >= 2 && 0 == strcmp(argv_[1], "-cc1")) - return mainEntryClickHouseClang(argc_, argv_); -#endif - #if USE_TCMALLOC /** Without this option, tcmalloc returns memory to OS too frequently for medium-sized memory allocations * (like IO buffers, column vectors, hash tables, etc.), diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index b7db44700bb..3adae6d9e93 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -85,7 +85,6 @@ struct Settings : public SettingsCollection M(SettingTotalsMode, totals_mode, TotalsMode::AFTER_HAVING_EXCLUSIVE, "How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = ‘any’ are present.") \ M(SettingFloat, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.") \ \ - M(SettingBool, compile, false, "Whether query compilation is enabled.") \ M(SettingBool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.") \ M(SettingBool, compile_expressions, false, "Compile some scalar functions and operators to native code.") \ M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.") \ @@ -351,6 +350,7 @@ struct Settings : public SettingsCollection /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ + M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 33fbb903497..373b47f7315 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -25,10 +25,6 @@ #include #include -#if __has_include() -#include -#endif - namespace ProfileEvents { @@ -47,7 +43,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_COMPILE_CODE; extern const int TOO_MANY_ROWS; extern const int EMPTY_DATA_PASSED; extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS; @@ -195,200 +190,6 @@ Aggregator::Aggregator(const Params & params_) } -void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) -{ - std::lock_guard lock(mutex); - - if (compiled_if_possible) - return; - - compiled_if_possible = true; - -#if !defined(INTERNAL_COMPILER_HEADERS) - throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); -#else - std::string method_typename_single_level; - std::string method_typename_two_level; - - if (false) {} -#define M(NAME) \ - else if (type == AggregatedDataVariants::Type::NAME) \ - { \ - method_typename_single_level = "decltype(AggregatedDataVariants::" #NAME ")::element_type"; \ - method_typename_two_level = "decltype(AggregatedDataVariants::" #NAME "_two_level)::element_type"; \ - } - - APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) -#undef M - -#define M(NAME) \ - else if (type == AggregatedDataVariants::Type::NAME) \ - method_typename_single_level = "decltype(AggregatedDataVariants::" #NAME ")::element_type"; - - APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) -#undef M - else if (type == AggregatedDataVariants::Type::without_key) {} - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - auto compiler_headers = Poco::Util::Application::instance().config().getString("compiler_headers", INTERNAL_COMPILER_HEADERS); - - /// List of types of aggregate functions. - std::stringstream aggregate_functions_typenames_str; - std::stringstream aggregate_functions_headers_args; - for (size_t i = 0; i < params.aggregates_size; ++i) - { - IAggregateFunction & func = *aggregate_functions[i]; - - int status = 0; - std::string type_name = demangle(typeid(func).name(), status); - - if (status) - throw Exception("Cannot compile code: cannot demangle name " + String(typeid(func).name()) - + ", status: " + toString(status), ErrorCodes::CANNOT_COMPILE_CODE); - - aggregate_functions_typenames_str << ((i != 0) ? ", " : "") << type_name; - - std::string header_path = func.getHeaderFilePath(); - auto pos = header_path.find("/AggregateFunctions/"); - - if (pos == std::string::npos) - throw Exception("Cannot compile code: unusual path of header file for aggregate function: " + header_path, - ErrorCodes::CANNOT_COMPILE_CODE); - - aggregate_functions_headers_args << "-include '" << compiler_headers << "/dbms/src"; - aggregate_functions_headers_args.write(&header_path[pos], header_path.size() - pos); - aggregate_functions_headers_args << "' "; - } - - aggregate_functions_headers_args << "-include '" << compiler_headers << "/dbms/src/Interpreters/SpecializedAggregator.h'"; - - std::string aggregate_functions_typenames = aggregate_functions_typenames_str.str(); - - std::stringstream key_str; - key_str << "Aggregate: "; - if (!method_typename_single_level.empty()) - key_str << method_typename_single_level + ", "; - key_str << aggregate_functions_typenames; - std::string key = key_str.str(); - - auto get_code = [method_typename_single_level, method_typename_two_level, aggregate_functions_typenames] - { - /// A short piece of code, which is an explicit instantiation of the template. - std::stringstream code; - code << /// No explicit inclusion of the header file. It is included using the -include compiler option. - "namespace DB\n" - "{\n" - "\n"; - - /// There can be up to two instantiations for the template - for normal and two_level options. - auto append_code_for_specialization = - [&code, &aggregate_functions_typenames] (const std::string & method_typename, const std::string & suffix) - { - code << - "template void Aggregator::executeSpecialized<\n" - " " << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n" - " " << method_typename << " &, Arena *, size_t, ColumnRawPtrs &,\n" - " AggregateColumns &, bool, AggregateDataPtr) const;\n" - "\n" - "static void wrapper" << suffix << "(\n" - " const Aggregator & aggregator,\n" - " " << method_typename << " & method,\n" - " Arena * arena,\n" - " size_t rows,\n" - " ColumnRawPtrs & key_columns,\n" - " Aggregator::AggregateColumns & aggregate_columns,\n" - " bool no_more_keys,\n" - " AggregateDataPtr overflow_row)\n" - "{\n" - " aggregator.executeSpecialized<\n" - " " << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n" - " method, arena, rows, key_columns, aggregate_columns, no_more_keys, overflow_row);\n" - "}\n" - "\n" - "void * getPtr" << suffix << "() __attribute__((__visibility__(\"default\")));\n" - "void * getPtr" << suffix << "()\n" /// Without this wrapper, it's not clear how to get the desired symbol from the compiled library. - "{\n" - " return reinterpret_cast(&wrapper" << suffix << ");\n" - "}\n"; - }; - - if (!method_typename_single_level.empty()) - append_code_for_specialization(method_typename_single_level, ""); - else - { - /// For `without_key` method. - code << - "template void Aggregator::executeSpecializedWithoutKey<\n" - " " << "TypeList<" << aggregate_functions_typenames << ">>(\n" - " AggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *) const;\n" - "\n" - "static void wrapper(\n" - " const Aggregator & aggregator,\n" - " AggregatedDataWithoutKey & method,\n" - " size_t rows,\n" - " Aggregator::AggregateColumns & aggregate_columns,\n" - " Arena * arena)\n" - "{\n" - " aggregator.executeSpecializedWithoutKey<\n" - " TypeList<" << aggregate_functions_typenames << ">>(\n" - " method, rows, aggregate_columns, arena);\n" - "}\n" - "\n" - "void * getPtr() __attribute__((__visibility__(\"default\")));\n" - "void * getPtr()\n" - "{\n" - " return reinterpret_cast(&wrapper);\n" - "}\n"; - } - - if (!method_typename_two_level.empty()) - append_code_for_specialization(method_typename_two_level, "TwoLevel"); - else - { - /// The stub. - code << - "void * getPtrTwoLevel() __attribute__((__visibility__(\"default\")));\n" - "void * getPtrTwoLevel()\n" - "{\n" - " return nullptr;\n" - "}\n"; - } - - code << - "}\n"; - - return code.str(); - }; - - auto compiled_data_owned_by_callback = compiled_data; - auto on_ready = [compiled_data_owned_by_callback] (SharedLibraryPtr & lib) - { - if (compiled_data_owned_by_callback.unique()) /// Aggregator is already destroyed. - return; - - compiled_data_owned_by_callback->compiled_aggregator = lib; - compiled_data_owned_by_callback->compiled_method_ptr = lib->get("_ZN2DB6getPtrEv")(); - compiled_data_owned_by_callback->compiled_two_level_method_ptr = lib->get("_ZN2DB14getPtrTwoLevelEv")(); - }; - - /** If the library has already been compiled, a non-zero SharedLibraryPtr is returned. - * If the library was not compiled, then the counter is incremented, and nullptr is returned. - * If the counter has reached the value min_count_to_compile, then the compilation starts asynchronously (in a separate thread) - * at the end of which `on_ready` callback is called. - */ - aggregate_functions_headers_args << " -Wno-unused-function"; - SharedLibraryPtr lib = params.compiler->getOrCount(key, params.min_count_to_compile, - aggregate_functions_headers_args.str(), - get_code, on_ready); - - /// If the result is already ready. - if (lib) - on_ready(lib); -#endif -} - - AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() { /// If no keys. All aggregating to single row. @@ -720,9 +521,6 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re result.keys_size = params.keys_size; result.key_sizes = key_sizes; LOG_TRACE(log, "Aggregation method: " << result.getMethodName()); - - if (params.compiler) - compileIfPossible(result.type); } if (isCancelled()) @@ -794,67 +592,21 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re /// For the case when there are no keys (all aggregate into one row). if (result.type == AggregatedDataVariants::Type::without_key) { - /// If there is a dynamically compiled code. - if (compiled_data->compiled_method_ptr) - { - reinterpret_cast< - void (*)(const Aggregator &, AggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *)> - (compiled_data->compiled_method_ptr)(*this, result.without_key, rows, aggregate_columns, result.aggregates_pool); - } - else - executeWithoutKeyImpl(result.without_key, rows, aggregate_functions_instructions.data(), result.aggregates_pool); + executeWithoutKeyImpl(result.without_key, rows, aggregate_functions_instructions.data(), result.aggregates_pool); } else { /// This is where data is written that does not fit in `max_rows_to_group_by` with `group_by_overflow_mode = any`. AggregateDataPtr overflow_row_ptr = params.overflow_row ? result.without_key : nullptr; - bool is_two_level = result.isTwoLevel(); - - /// Compiled code, for the normal structure. - if (!is_two_level && compiled_data->compiled_method_ptr) - { - #define M(NAME, IS_TWO_LEVEL) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - reinterpret_cast(compiled_data->compiled_method_ptr) \ - (*this, *result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \ - no_more_keys, overflow_row_ptr); - - if (false) {} - APPLY_FOR_AGGREGATED_VARIANTS(M) - #undef M - } - /// Compiled code, for a two-level structure. - else if (is_two_level && compiled_data->compiled_two_level_method_ptr) - { - #define M(NAME) \ - else if (result.type == AggregatedDataVariants::Type::NAME) \ - reinterpret_cast(compiled_data->compiled_two_level_method_ptr) \ - (*this, *result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \ - no_more_keys, overflow_row_ptr); - - if (false) {} - APPLY_FOR_VARIANTS_TWO_LEVEL(M) - #undef M - } - /// When there is no dynamically compiled code. - else - { #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ executeImpl(*result.NAME, result.aggregates_pool, rows, key_columns, aggregate_functions_instructions.data(), \ no_more_keys, overflow_row_ptr); - if (false) {} - APPLY_FOR_AGGREGATED_VARIANTS(M) + if (false) {} + APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M - } } size_t result_size = result.sizeWithoutOverflowRow(); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 41fd957345e..b48663ff689 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -778,10 +777,6 @@ public: const size_t max_rows_to_group_by; const OverflowMode group_by_overflow_mode; - /// For dynamic compilation. - Compiler * compiler; - const UInt32 min_count_to_compile; - /// Two-level aggregation settings (used for a large number of keys). /** With how many keys or the size of the aggregation state in bytes, * two-level aggregation begins to be used. Enough to reach of at least one of the thresholds. @@ -805,7 +800,6 @@ public: const Block & src_header_, const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, - Compiler * compiler_, UInt32 min_count_to_compile_, size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, size_t max_bytes_before_external_group_by_, bool empty_result_for_aggregation_by_empty_set_, @@ -813,7 +807,6 @@ public: : src_header(src_header_), keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_), - compiler(compiler_), min_count_to_compile(min_count_to_compile_), group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_), max_bytes_before_external_group_by(max_bytes_before_external_group_by_), empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_), @@ -824,7 +817,7 @@ public: /// Only parameters that matter during merge. Params(const Block & intermediate_header_, const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) - : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, nullptr, 0, 0, 0, 0, false, "", max_threads_) + : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, "", max_threads_) { intermediate_header = intermediate_header_; } @@ -956,26 +949,6 @@ protected: Logger * log = &Logger::get("Aggregator"); - /** Dynamically compiled library for aggregation, if any. - * The meaning of dynamic compilation is to specialize code - * for a specific list of aggregate functions. - * This allows you to expand the loop to create and update states of aggregate functions, - * and also use inline-code instead of virtual calls. - */ - struct CompiledData - { - SharedLibraryPtr compiled_aggregator; - - /// Obtained with dlsym. It is still necessary to make reinterpret_cast to the function pointer. - void * compiled_method_ptr = nullptr; - void * compiled_two_level_method_ptr = nullptr; - }; - /// shared_ptr - to pass into a callback, that can survive Aggregator. - std::shared_ptr compiled_data { new CompiledData }; - - bool compiled_if_possible = false; - void compileIfPossible(AggregatedDataVariants::Type type); - /// Returns true if you can abort the current task. CancellationHook isCancelled; @@ -1037,35 +1010,6 @@ protected: Method & method, IBlockOutputStream & out); -public: - /// Templates that are instantiated by dynamic code compilation - see SpecializedAggregator.h - - template - void executeSpecialized( - Method & method, - Arena * aggregates_pool, - size_t rows, - ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool no_more_keys, - AggregateDataPtr overflow_row) const; - - template - void executeSpecializedCase( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t rows, - AggregateColumns & aggregate_columns, - AggregateDataPtr overflow_row) const; - - template - void executeSpecializedWithoutKey( - AggregatedDataWithoutKey & res, - size_t rows, - AggregateColumns & aggregate_columns, - Arena * arena) const; - protected: /// Merge NULL key data from hash table `src` into `dst`. template diff --git a/dbms/src/Interpreters/CMakeLists.txt b/dbms/src/Interpreters/CMakeLists.txt index 75771a07027..65172356645 100644 --- a/dbms/src/Interpreters/CMakeLists.txt +++ b/dbms/src/Interpreters/CMakeLists.txt @@ -1,70 +1,3 @@ - -if (OS_FREEBSD) - set (PATH_SHARE "/usr/local/share" CACHE STRING "") -else () - set (PATH_SHARE "/usr/share" CACHE STRING "") -endif () - -set (INTERNAL_COMPILER_BIN_ROOT "${CMAKE_INSTALL_FULL_BINDIR}/" CACHE STRING "") -set (INTERNAL_COMPILER_EXECUTABLE "clickhouse-clang" CACHE STRING "") -set (INTERNAL_LINKER_EXECUTABLE "clickhouse-lld" CACHE STRING "") - -# Disabling leak reporting for these tools -if (SANITIZE STREQUAL "address") - # Note that this doesn't work for setuid and setcap binaries - set(INTERNAL_COMPILER_ENV "env ASAN_OPTIONS=detect_leaks=0" CACHE STRING "") -else () - set(INTERNAL_COMPILER_ENV "" CACHE STRING "") -endif () - -set (INTERNAL_COMPILER_NO_WARNING OFF CACHE INTERNAL "") -set (INTERNAL_COMPILER_HEADERS_DIR "headers" CACHE STRING "") -set (INTERNAL_COMPILER_HEADERS_RELATIVE "${INTERNAL_COMPILER_HEADERS_DIR}/${VERSION_STRING}" CACHE STRING "") -set (INTERNAL_COMPILER_HEADERS "${PATH_SHARE}/clickhouse/${INTERNAL_COMPILER_HEADERS_RELATIVE}" CACHE STRING "") - -if(OS_FREEBSD) - set(INTERNAL_COMPILER_HEADERS_ROOT "" CACHE STRING "") -else() - set(INTERNAL_COMPILER_HEADERS_ROOT "${INTERNAL_COMPILER_HEADERS}" CACHE STRING "") - set(INTERNAL_COMPILER_CUSTOM_ROOT ON CACHE INTERNAL "") -endif() - -if(NOT INTERNAL_COMPILER_FLAGS) - set(INTERNAL_COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UC}} ${CXX_FLAGS_INTERNAL_COMPILER} -x c++ -march=native -fPIC -fvisibility=hidden -fno-implement-inlines -Wno-unused-command-line-argument -Bprefix=${PATH_SHARE}/clickhouse" CACHE STRING "") - if(INTERNAL_COMPILER_CUSTOM_ROOT) - set(INTERNAL_COMPILER_FLAGS "${INTERNAL_COMPILER_FLAGS} -nostdinc -nostdinc++") - if(INTERNAL_COMPILER_HEADERS_ROOT) - set(INTERNAL_COMPILER_FLAGS "${INTERNAL_COMPILER_FLAGS} -isysroot=${INTERNAL_COMPILER_HEADERS_ROOT}") - endif() - endif() -endif() -# TODO: use libs from package: -nodefaultlibs -lm -lc -lgcc_s -lgcc -lc++ -lc++abi - -string(REPLACE "${INCLUDE_DEBUG_HELPERS}" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -string(REPLACE "-no-pie" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -if (INTERNAL_COMPILER_NO_WARNING) - string (REPLACE "-Wall" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) - string (REPLACE "-Wextra" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) - string (REPLACE "-Werror" "" INTERNAL_COMPILER_FLAGS ${INTERNAL_COMPILER_FLAGS}) -endif () - -list(GET Poco_INCLUDE_DIRS 0 Poco_Foundation_INCLUDE_DIR) -list(GET Poco_INCLUDE_DIRS 1 Poco_Util_INCLUDE_DIR) - -if (NOT DOUBLE_CONVERSION_INCLUDE_DIR) - get_target_property(DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES) -endif () - -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR ${DOUBLE_CONVERSION_INCLUDE_DIR}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Poco_Foundation_INCLUDE_DIR ${Poco_Foundation_INCLUDE_DIR}) -string (REPLACE ${ClickHouse_SOURCE_DIR} "" INTERNAL_Poco_Util_INCLUDE_DIR ${Poco_Util_INCLUDE_DIR}) - -message (STATUS "Using internal=${USE_INTERNAL_LLVM_LIBRARY} compiler=${USE_EMBEDDED_COMPILER}: headers=${INTERNAL_COMPILER_HEADERS} root=${INTERNAL_COMPILER_HEADERS_ROOT}: ${INTERNAL_COMPILER_ENV} ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE} ${INTERNAL_COMPILER_FLAGS}; ${INTERNAL_LINKER_EXECUTABLE}") - -set (CONFIG_COMPILE ${ClickHouse_BINARY_DIR}/dbms/src/Interpreters/config_compile.h) -configure_file (${ClickHouse_SOURCE_DIR}/dbms/src/Interpreters/config_compile.h.in ${CONFIG_COMPILE}) - if (ENABLE_TESTS) add_subdirectory (tests) endif () diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp deleted file mode 100644 index 3b420b6acce..00000000000 --- a/dbms/src/Interpreters/Compiler.cpp +++ /dev/null @@ -1,326 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if __has_include() -#include -#endif - -namespace ProfileEvents -{ - extern const Event CompileAttempt; - extern const Event CompileSuccess; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_DLOPEN; - extern const int CANNOT_COMPILE_CODE; -} - -Compiler::Compiler(const std::string & path_, size_t threads) - : path(path_), pool(threads) -{ - Poco::File(path).createDirectory(); - - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it) - { - const std::string & name = dir_it.name(); - if (endsWith(name, ".so")) - { - files.insert(name.substr(0, name.size() - 3)); - } - } - - LOG_INFO(log, "Having " << files.size() << " compiled files from previous start."); -} - -Compiler::~Compiler() -{ - LOG_DEBUG(log, "Waiting for threads to finish."); - pool.wait(); -} - - -static Compiler::HashedKey getHash(const std::string & key) -{ - SipHash hash; - - auto revision = ClickHouseRevision::get(); - hash.update(revision); - hash.update(key.data(), key.size()); - - Compiler::HashedKey res; - hash.get128(res.low, res.high); - return res; -} - - -/// Without .so extension. -static std::string hashedKeyToFileName(Compiler::HashedKey hashed_key) -{ - WriteBufferFromOwnString out; - out << hashed_key.low << '_' << hashed_key.high; - return out.str(); -} - - -SharedLibraryPtr Compiler::getOrCount( - const std::string & key, - UInt32 min_count_to_compile, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready) -{ - HashedKey hashed_key = getHash(key); - - std::lock_guard lock(mutex); - - UInt32 count = ++counts[hashed_key]; - - /// Is there a ready open library? Or, if the library is in the process of compiling, there will be nullptr. - Libraries::iterator libraries_it = libraries.find(hashed_key); - if (libraries.end() != libraries_it) - { - if (!libraries_it->second) - LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is already compiling or compilation was failed."); - - /// TODO In this case, after the compilation is finished, the callback will not be called. - - return libraries_it->second; - } - - /// Is there a file with the library left over from the previous launch? - std::string file_name = hashedKeyToFileName(hashed_key); - Files::iterator files_it = files.find(file_name); - if (files.end() != files_it) - { - std::string so_file_path = path + '/' + file_name + ".so"; - LOG_INFO(log, "Loading existing library " << so_file_path); - - SharedLibraryPtr lib; - - try - { - lib = std::make_shared(so_file_path); - } - catch (const Exception & e) - { - if (e.code() != ErrorCodes::CANNOT_DLOPEN) - throw; - - /// Found broken .so file (or file cannot be dlopened by whatever reason). - /// This could happen when filesystem is corrupted after server restart. - /// We remove the file - it will be recompiled on next attempt. - - tryLogCurrentException(log); - - files.erase(files_it); - Poco::File(so_file_path).remove(); - return nullptr; - } - - libraries[hashed_key] = lib; - return lib; - } - - /// Has min_count_to_compile been reached? - if (count >= min_count_to_compile) - { - /// The min_count_to_compile value of zero indicates the need for synchronous compilation. - - /// Indicates that the library is in the process of compiling. - libraries[hashed_key] = nullptr; - - LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); - - if (min_count_to_compile == 0) - { - { - ext::unlock_guard unlock(mutex); - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } - - return libraries[hashed_key]; - } - else - { - bool res = pool.trySchedule([=] - { - try - { - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } - catch (...) - { - tryLogCurrentException("Compiler"); - } - }); - - if (!res) - LOG_INFO(log, "All threads are busy."); - } - } - - return nullptr; -} - - -/// This will guarantee that code will compile only when version of headers match version of running server. -static void addCodeToAssertHeadersMatch(WriteBuffer & out) -{ - out << - "#include \n" - "#if VERSION_REVISION != " << ClickHouseRevision::get() << "\n" - "#define STRING2(x) #x\n" - "#define STRING(x) STRING2(x)\n" - "#pragma message \"ClickHouse headers revision = \" STRING(VERSION_REVISION) \n" - "#error \"ClickHouse headers revision doesn't match runtime revision of the server (" << ClickHouseRevision::get() << ").\"\n" - "#endif\n\n"; -} - - -void Compiler::compile( - HashedKey hashed_key, - std::string file_name, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready) -{ - ProfileEvents::increment(ProfileEvents::CompileAttempt); - -#if !defined(INTERNAL_COMPILER_EXECUTABLE) - throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); -#else - std::string prefix = path + "/" + file_name; - std::string cpp_file_path = prefix + ".cpp"; - std::string so_file_path = prefix + ".so"; - std::string so_tmp_file_path = prefix + ".so.tmp"; - - { - WriteBufferFromFile out(cpp_file_path); - - addCodeToAssertHeadersMatch(out); - out << get_code(); - } - - std::stringstream command; - - auto compiler_executable_root = Poco::Util::Application::instance().config().getString("compiler_executable_root", INTERNAL_COMPILER_BIN_ROOT); - auto compiler_headers = Poco::Util::Application::instance().config().getString("compiler_headers", INTERNAL_COMPILER_HEADERS); - auto compiler_headers_root = Poco::Util::Application::instance().config().getString("compiler_headers_root", INTERNAL_COMPILER_HEADERS_ROOT); - LOG_DEBUG(log, "Using internal compiler: compiler_executable_root=" << compiler_executable_root << "; compiler_headers_root=" << compiler_headers_root << "; compiler_headers=" << compiler_headers); - - /// Slightly unconvenient. - command << - "(" - INTERNAL_COMPILER_ENV - " " << compiler_executable_root << INTERNAL_COMPILER_EXECUTABLE - " " INTERNAL_COMPILER_FLAGS - /// It is hard to correctly call a ld program manually, because it is easy to skip critical flags, which might lead to - /// unhandled exceptions. Therefore pass path to llvm's lld directly to clang. - " -fuse-ld=" << compiler_executable_root << INTERNAL_LINKER_EXECUTABLE - " -fdiagnostics-color=never" - - /// Do not use libgcc and startup files. The library will work nevertheless and we avoid extra dependency. - " -nodefaultlibs -nostartfiles" - - #if INTERNAL_COMPILER_CUSTOM_ROOT - /// To get correct order merge this results carefully: - /// echo | clang -x c++ -E -Wp,-v - - /// echo | g++ -x c++ -E -Wp,-v - - - " -isystem " << compiler_headers_root << "/usr/include/c++/*" - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE "/c++/*" - #endif - " -isystem " << compiler_headers_root << "/usr/include/c++/*/backward" - " -isystem " << compiler_headers_root << "/usr/include/clang/*/include" /// if compiler is clang (from package) - " -isystem " << compiler_headers_root << "/usr/local/lib/clang/*/include" /// if clang installed manually - " -isystem " << compiler_headers_root << "/usr/lib/clang/*/include" /// if clang build from submodules - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include-fixed" - " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include" - #endif - " -isystem " << compiler_headers_root << "/usr/local/include" /// if something installed manually - #if defined(CMAKE_LIBRARY_ARCHITECTURE) - " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE - #endif - " -isystem " << compiler_headers_root << "/usr/include" - #endif - " -I " << compiler_headers << "/dbms/src/" - " -isystem " << compiler_headers << "/contrib/cityhash102/include/" - " -isystem " << compiler_headers << "/contrib/libpcg-random/include/" - #if USE_MIMALLOC - " -isystem " << compiler_headers << "/contrib/mimalloc/include/" - #endif - " -isystem " << compiler_headers << INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR - " -isystem " << compiler_headers << INTERNAL_Poco_Foundation_INCLUDE_DIR - " -isystem " << compiler_headers << INTERNAL_Boost_INCLUDE_DIRS - " -I " << compiler_headers << "/libs/libcommon/include/" - " " << additional_compiler_flags << - " -shared -o " << so_tmp_file_path << " " << cpp_file_path - << " 2>&1" - ") || echo Return code: $?"; - -#ifndef NDEBUG - LOG_TRACE(log, "Compile command: " << command.str()); -#endif - - std::string compile_result; - - { - auto process = ShellCommand::execute(command.str()); - readStringUntilEOF(compile_result, process->out); - process->wait(); - } - - if (!compile_result.empty()) - { - std::string error_message = "Cannot compile code:\n\n" + command.str() + "\n\n" + compile_result; - - Poco::File so_tmp_file(so_tmp_file_path); - if (so_tmp_file.exists() && so_tmp_file.canExecute()) - { - /// Compiler may emit information messages. This is suspicious, but we still can use compiled result. - LOG_WARNING(log, error_message); - } - else - throw Exception(error_message, ErrorCodes::CANNOT_COMPILE_CODE); - } - - /// If there was an error before, the file with the code remains for viewing. - Poco::File(cpp_file_path).remove(); - - Poco::File(so_tmp_file_path).renameTo(so_file_path); - SharedLibraryPtr lib(new SharedLibrary(so_file_path)); - - { - std::lock_guard lock(mutex); - libraries[hashed_key] = lib; - } - - LOG_INFO(log, "Compiled code " << file_name); - ProfileEvents::increment(ProfileEvents::CompileSuccess); - - on_ready(lib); - -#endif -} - - -} diff --git a/dbms/src/Interpreters/Compiler.h b/dbms/src/Interpreters/Compiler.h deleted file mode 100644 index b79cf26e0f0..00000000000 --- a/dbms/src/Interpreters/Compiler.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace DB -{ - -/** Lets you compile a piece of code that uses the server's header files into the dynamic library. - * Conducts statistic of calls, and initiates compilation only on the N-th call for one key. - * Compilation is performed asynchronously, in separate threads, if there are free threads. - * NOTE: There is no cleaning of obsolete and unnecessary results. - */ -class Compiler -{ -public: - /** path - path to the directory with temporary files - the results of the compilation. - * The compilation results are saved when the server is restarted, - * but use the revision number as part of the key. That is, they become obsolete when the server is updated. - */ - Compiler(const std::string & path_, size_t threads); - ~Compiler(); - - using HashedKey = UInt128; - - using CodeGenerator = std::function; - using ReadyCallback = std::function; - - /** Increase the counter for the given key `key` by one. - * If the compilation result already exists (already open, or there is a file with the library), - * then return ready SharedLibrary. - * Otherwise, if min_count_to_compile == 0, then initiate the compilation in the same thread, wait for it, and return the result. - * Otherwise, if the counter has reached min_count_to_compile, - * initiate compilation in a separate thread, if there are free threads, and return nullptr. - * Otherwise, return nullptr. - */ - SharedLibraryPtr getOrCount( - const std::string & key, - UInt32 min_count_to_compile, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready); - -private: - using Counts = std::unordered_map; - using Libraries = std::unordered_map; - using Files = std::unordered_set; - - const std::string path; - ThreadPool pool; - - /// Number of calls to `getOrCount`. - Counts counts; - - /// Compiled and open libraries. Or nullptr for libraries in the compilation process. - Libraries libraries; - - /// Compiled files remaining from previous runs, but not yet open. - Files files; - - std::mutex mutex; - - Logger * log = &Logger::get("Compiler"); - - - void compile( - HashedKey hashed_key, - std::string file_name, - const std::string & additional_compiler_flags, - CodeGenerator get_code, - ReadyCallback on_ready); -}; - -} diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 53392f9ad9a..67a81a94b3a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -140,7 +139,6 @@ struct ContextShared std::optional background_pool; /// The thread pool for the background work performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. - std::optional compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; @@ -1634,17 +1632,6 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrcompiler) - shared->compiler.emplace(shared->path + "build/", 1); - - return *shared->compiler; -} - - void Context::initializeSystemLogs() { auto lock = getLock(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index dcc3fa9b3ee..2d583c3c353 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1654,7 +1654,6 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre Aggregator::Params params(header, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, @@ -1721,7 +1720,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const Aggregator::Params params(header_before_aggregation, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, @@ -1943,7 +1941,6 @@ void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificato Aggregator::Params params(header, keys, aggregates, false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath(), settings.max_threads); @@ -1973,7 +1970,6 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPipeline & pipeline, Modif Aggregator::Params params(header_before_transform, keys, aggregates, false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath(), settings.max_threads); diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h deleted file mode 100644 index 9a238c77032..00000000000 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ /dev/null @@ -1,215 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - - -/** An aggregation loop template that allows you to generate a custom variant for a specific combination of aggregate functions. - * It differs from the usual one in that calls to aggregate functions should be inlined, and the update loop of the aggregate functions should be unrolled. - * - * Since there are too many possible combinations, it is not possible to generate them all in advance. - * This template is intended to instantiate it in runtime, - * by running the compiler, compiling shared library, and using it with `dlopen`. - */ - - -struct AggregateFunctionsUpdater -{ - AggregateFunctionsUpdater( - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions_, - const Sizes & offsets_of_aggregate_states_, - Aggregator::AggregateColumns & aggregate_columns_, - AggregateDataPtr & value_, - size_t row_num_, - Arena * arena_) - : aggregate_functions(aggregate_functions_), - offsets_of_aggregate_states(offsets_of_aggregate_states_), - aggregate_columns(aggregate_columns_), - value(value_), row_num(row_num_), arena(arena_) - { - } - - template - void operator()() ALWAYS_INLINE; - - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions; - const Sizes & offsets_of_aggregate_states; - Aggregator::AggregateColumns & aggregate_columns; - AggregateDataPtr & value; - size_t row_num; - Arena * arena; -}; - -template -void AggregateFunctionsUpdater::operator()() -{ - static_cast(aggregate_functions[column_num])->add( - value + offsets_of_aggregate_states[column_num], - aggregate_columns[column_num].data(), - row_num, arena); -} - -struct AggregateFunctionsCreator -{ - AggregateFunctionsCreator( - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions_, - const Sizes & offsets_of_aggregate_states_, - AggregateDataPtr & aggregate_data_) - : aggregate_functions(aggregate_functions_), - offsets_of_aggregate_states(offsets_of_aggregate_states_), - aggregate_data(aggregate_data_) - { - } - - template - void operator()() ALWAYS_INLINE; - - const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions; - const Sizes & offsets_of_aggregate_states; - AggregateDataPtr & aggregate_data; -}; - -template -void AggregateFunctionsCreator::operator()() -{ - AggregateFunction * func = static_cast(aggregate_functions[column_num]); - - try - { - /** An exception may occur if there is a shortage of memory. - * To ensure that everything is properly destroyed, we "roll back" some of the created states. - * The code is not very convenient. - */ - func->create(aggregate_data + offsets_of_aggregate_states[column_num]); - } - catch (...) - { - for (size_t rollback_j = 0; rollback_j < column_num; ++rollback_j) - func->destroy(aggregate_data + offsets_of_aggregate_states[rollback_j]); - - throw; - } -} - - -template -void NO_INLINE Aggregator::executeSpecialized( - Method & method, - Arena * aggregates_pool, - size_t rows, - ColumnRawPtrs & key_columns, - AggregateColumns & aggregate_columns, - bool no_more_keys, - AggregateDataPtr overflow_row) const -{ - typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - executeSpecializedCase( - method, state, aggregates_pool, rows, aggregate_columns, overflow_row); - else - executeSpecializedCase( - method, state, aggregates_pool, rows, aggregate_columns, overflow_row); -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wuninitialized" - -template -void NO_INLINE Aggregator::executeSpecializedCase( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - size_t rows, - AggregateColumns & aggregate_columns, - AggregateDataPtr overflow_row) const -{ - /// For all rows. - for (size_t i = 0; i < rows; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - - if (!no_more_keys) /// Insert. - { - auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); - - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - AggregateFunctionsList::forEach(AggregateFunctionsCreator( - aggregate_functions, offsets_of_aggregate_states, aggregate_data)); - - emplace_result.setMapped(aggregate_data); - } - else - aggregate_data = emplace_result.getMapped(); - } - else - { - /// Add only if the key already exists. - auto find_result = state.findKey(method.data, i, *aggregates_pool); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); - } - - /// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do. - if (!aggregate_data && !overflow_row) - continue; - - auto value = aggregate_data ? aggregate_data : overflow_row; - - /// Add values into the aggregate functions. - AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i, aggregates_pool)); - } -} - -#pragma GCC diagnostic pop - -template -void NO_INLINE Aggregator::executeSpecializedWithoutKey( - AggregatedDataWithoutKey & res, - size_t rows, - AggregateColumns & aggregate_columns, - Arena * arena) const -{ - for (size_t i = 0; i < rows; ++i) - { - AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i, arena)); - } -} - -} - - -/** The main code is compiled with gcc 7. - * But SpecializedAggregator is compiled using clang 6 into the .so file. - * This is done because gcc can not get functions inlined, - * which were de-virtualized, in a particular case, and the performance is lower. - * And also it's easier to distribute clang for deploy to the servers. - * - * After switching from gcc 4.8 and gnu++1x to gcc 4.9 and gnu++1y (and then to gcc 5), - * an error occurred with `dlopen`: undefined symbol: __cxa_pure_virtual - * - * Most likely, this is due to the changed version of this symbol: - * gcc creates a symbol in .so - * U __cxa_pure_virtual@@CXXABI_1.3 - * but clang creates a symbol - * U __cxa_pure_virtual - * - * But it does not matter for us how the __cxa_pure_virtual function will be implemented, - * because it is not called during normal program execution, - * and if called - then the program is guaranteed buggy. - * - * Therefore, we can work around the problem this way - */ -extern "C" void __attribute__((__visibility__("default"), __noreturn__)) __cxa_pure_virtual() { abort(); } diff --git a/dbms/src/Interpreters/config_compile.h.in b/dbms/src/Interpreters/config_compile.h.in deleted file mode 100644 index e8db534a62d..00000000000 --- a/dbms/src/Interpreters/config_compile.h.in +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#cmakedefine CMAKE_LIBRARY_ARCHITECTURE "@CMAKE_LIBRARY_ARCHITECTURE@" -#cmakedefine PATH_SHARE "@PATH_SHARE@" -#cmakedefine INTERNAL_COMPILER_FLAGS "@INTERNAL_COMPILER_FLAGS@" -#cmakedefine INTERNAL_COMPILER_BIN_ROOT "@INTERNAL_COMPILER_BIN_ROOT@" -#cmakedefine INTERNAL_LINKER_EXECUTABLE "@INTERNAL_LINKER_EXECUTABLE@" -#cmakedefine INTERNAL_COMPILER_EXECUTABLE "@INTERNAL_COMPILER_EXECUTABLE@" -#cmakedefine INTERNAL_COMPILER_ENV "@INTERNAL_COMPILER_ENV@" -#if !defined(INTERNAL_COMPILER_ENV) -# define INTERNAL_COMPILER_ENV "" -#endif -#cmakedefine INTERNAL_COMPILER_HEADERS "@INTERNAL_COMPILER_HEADERS@" -#if !defined(INTERNAL_COMPILER_HEADERS) -# define INTERNAL_COMPILER_HEADERS "" -#endif -#cmakedefine INTERNAL_COMPILER_HEADERS_ROOT "@INTERNAL_COMPILER_HEADERS_ROOT@" -#if !defined(INTERNAL_COMPILER_HEADERS_ROOT) -# define INTERNAL_COMPILER_HEADERS_ROOT "" -#endif - -#cmakedefine01 INTERNAL_COMPILER_CUSTOM_ROOT -#cmakedefine INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR "@INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR@" -#cmakedefine INTERNAL_Poco_Foundation_INCLUDE_DIR "@INTERNAL_Poco_Foundation_INCLUDE_DIR@" -#cmakedefine INTERNAL_Poco_Util_INCLUDE_DIR "@INTERNAL_Poco_Util_INCLUDE_DIR@" -#cmakedefine INTERNAL_Boost_INCLUDE_DIRS "@INTERNAL_Boost_INCLUDE_DIRS@" diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index b4f9fff1d36..3fac5424c00 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -41,9 +41,6 @@ add_executable (two_level_hash_map two_level_hash_map.cpp) target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) target_link_libraries (two_level_hash_map PRIVATE dbms) -add_executable (compiler_test compiler_test.cpp) -target_link_libraries (compiler_test PRIVATE dbms) - add_executable (logical_expressions_optimizer logical_expressions_optimizer.cpp) target_link_libraries (logical_expressions_optimizer PRIVATE dbms clickhouse_parsers) diff --git a/dbms/src/Interpreters/tests/compiler_test.cpp b/dbms/src/Interpreters/tests/compiler_test.cpp deleted file mode 100644 index c56cf5775d6..00000000000 --- a/dbms/src/Interpreters/tests/compiler_test.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include - -#include - - -int main(int, char **) -{ - using namespace DB; - - Poco::AutoPtr channel = new Poco::ConsoleChannel(std::cerr); - Logger::root().setChannel(channel); - Logger::root().setLevel("trace"); - - /// Check exception handling and catching - try - { - Compiler compiler(".", 1); - - auto lib = compiler.getOrCount("catch_me_if_you_can", 0, "", []() -> std::string - { - return - "#include \n" - "void f() __attribute__((__visibility__(\"default\")));\n" - "void f()" - "{" - "try { throw std::runtime_error(\"Catch me if you can\"); }" - "catch (const std::runtime_error & e) { std::cout << \"Caught in .so: \" << e.what() << std::endl; throw; }\n" - "}" - ; - }, [](SharedLibraryPtr &){}); - - auto f = lib->template get("_Z1fv"); - - try - { - f(); - } - catch (const std::exception & e) - { - std::cout << "Caught in main(): " << e.what() << "\n"; - return 0; - } - catch (...) - { - std::cout << "Unknown exception\n"; - return -1; - } - } - catch (...) - { - std::cerr << getCurrentExceptionMessage(true) << "\n"; - return -1; - } - - return 0; -} diff --git a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference b/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference deleted file mode 100644 index 207dc069e43..00000000000 --- a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 Hello -2 Hello diff --git a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql b/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql deleted file mode 100644 index 5902b94b753..00000000000 --- a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.sql +++ /dev/null @@ -1,2 +0,0 @@ -SET compile = 1, min_count_to_compile = 0, max_threads = 1, send_logs_level = 'none'; -SELECT arrayJoin([1, 2, 1]) AS UserID, argMax('Hello', today()) AS res GROUP BY UserID; diff --git a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference b/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference deleted file mode 100644 index 6ed281c757a..00000000000 --- a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -1 diff --git a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh b/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh deleted file mode 100755 index fbf5efcda2c..00000000000 --- a/dbms/tests/queries/0_stateless/00568_compile_catch_throw.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -SETTINGS="--compile=1 --min_count_to_compile=0 --max_threads=1 --max_memory_usage=8000000 --server_logs_file=/dev/null" -output=$($CLICKHOUSE_CLIENT -q "SELECT length(groupArray(number)) FROM (SELECT * FROM system.numbers LIMIT 1000000)" $SETTINGS 2>&1) - -[[ $? -eq 0 ]] && echo "Expected non-zero RC" -if ! echo "$output" | grep -Fc -e 'Memory limit (for query) exceeded' -e 'Cannot compile code' ; then - echo -e 'There is no expected exception "Memory limit (for query) exceeded: would use..." or "Cannot compile code..."' "Whereas got:\n$output" -fi - -$CLICKHOUSE_CLIENT -q "SELECT 1" From cc0157b29378cc6af380976850a75d9c026f768c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 02:07:05 +0300 Subject: [PATCH 47/59] Added a test --- dbms/src/Functions/trim.cpp | 2 +- .../queries/0_stateless/00997_trim.reference | 0 dbms/tests/queries/0_stateless/00997_trim.sql | 20 +++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00997_trim.reference create mode 100644 dbms/tests/queries/0_stateless/00997_trim.sql diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 81916604d63..46f69530005 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -85,7 +85,7 @@ private: char_data += num_chars; } - if constexpr (mode::trim_left) + if constexpr (mode::trim_right) { const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end); if (found) diff --git a/dbms/tests/queries/0_stateless/00997_trim.reference b/dbms/tests/queries/0_stateless/00997_trim.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00997_trim.sql b/dbms/tests/queries/0_stateless/00997_trim.sql new file mode 100644 index 00000000000..7519877ec5e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_trim.sql @@ -0,0 +1,20 @@ +WITH + '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' AS x, + replaceRegexpAll(x, '.', ' ') AS spaces, + concat(substring(spaces, 1, rand(1) % 62), substring(x, 1, rand(2) % 62), substring(spaces, 1, rand(3) % 62)) AS s, + trimLeft(s) AS sl, + trimRight(s) AS sr, + trimBoth(s) AS t, + replaceRegexpOne(s, '^ +', '') AS slr, + replaceRegexpOne(s, ' +$', '') AS srr, + replaceRegexpOne(s, '^ *(.*?) *$', '\\1') AS tr +SELECT + replaceAll(s, ' ', '_'), + replaceAll(sl, ' ', '_'), + replaceAll(slr, ' ', '_'), + replaceAll(sr, ' ', '_'), + replaceAll(srr, ' ', '_'), + replaceAll(t, ' ', '_'), + replaceAll(tr, ' ', '_') +FROM numbers(100000) +WHERE NOT ((sl = slr) AND (sr = srr) AND (t = tr)) From 7703d321138c2854142f10709cb6451e4b5a024e Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Aug 2019 13:53:22 +0800 Subject: [PATCH 48/59] remove symlinks --- docs/zh/database_engines/index.md | 1 - docs/zh/database_engines/mysql.md | 1 - 2 files changed, 2 deletions(-) delete mode 120000 docs/zh/database_engines/index.md delete mode 120000 docs/zh/database_engines/mysql.md diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md deleted file mode 120000 index bbdb762a4ad..00000000000 --- a/docs/zh/database_engines/index.md +++ /dev/null @@ -1 +0,0 @@ -../../en/database_engines/index.md \ No newline at end of file diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md deleted file mode 120000 index 51ac4126e2d..00000000000 --- a/docs/zh/database_engines/mysql.md +++ /dev/null @@ -1 +0,0 @@ -../../en/database_engines/mysql.md \ No newline at end of file From 7d7c13632cb3c400e3920f06493d888da3b654e0 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Aug 2019 13:54:01 +0800 Subject: [PATCH 49/59] Translate database engine documentation, update table engine documentation. --- docs/zh/database_engines/index.md | 11 +++ docs/zh/database_engines/mysql.md | 124 ++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 docs/zh/database_engines/index.md create mode 100644 docs/zh/database_engines/mysql.md diff --git a/docs/zh/database_engines/index.md b/docs/zh/database_engines/index.md new file mode 100644 index 00000000000..f8ae05e2520 --- /dev/null +++ b/docs/zh/database_engines/index.md @@ -0,0 +1,11 @@ +# 数据库引擎 + +您使用的所有表都是由数据库引擎所提供的 + +默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). + +除此之外,您还可以选择使用以下的数据库引擎: + +- [MySQL](mysql.md) + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/) diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/database_engines/mysql.md new file mode 100644 index 00000000000..38dfcb5ef64 --- /dev/null +++ b/docs/zh/database_engines/mysql.md @@ -0,0 +1,124 @@ +# MySQL + +MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 + +`MySQL`数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此您可以执行诸如`SHOW TABLES`或`SHOW CREATE TABLE`之类的操作。 + +但您无法对其执行以下操作: + +- `ATTACH`/`DETACH` +- `DROP` +- `RENAME` +- `CREATE TABLE` +- `ALTER` + + +## CREATE DATABASE + +``` sql +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] +ENGINE = MySQL('host:port', 'database', 'user', 'password') +``` + +**MySQL数据库引擎参数** + +- `host:port` — 链接的MySQL地址。 +- `database` — 链接的MySQL数据库。 +- `user` — 链接的MySQL用户。 +- `password` — 链接的MySQL用户密码。 + + +## 支持的类型对应 + +MySQL | ClickHouse +------|------------ +UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) +TINYINT | [Int8](../data_types/int_uint.md) +UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) +SMALLINT | [Int16](../data_types/int_uint.md) +UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) +INT, MEDIUMINT | [Int32](../data_types/int_uint.md) +UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) +BIGINT | [Int64](../data_types/int_uint.md) +FLOAT | [Float32](../data_types/float.md) +DOUBLE | [Float64](../data_types/float.md) +DATE | [Date](../data_types/date.md) +DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) +BINARY | [FixedString](../data_types/fixedstring.md) + +其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 + +同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 + + +## 使用示例 + +在MySQL中创建表: + +``` +mysql> USE test; +Database changed + +mysql> CREATE TABLE `mysql_table` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `float` FLOAT NOT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from mysql_table; ++--------+-------+ +| int_id | value | ++--------+-------+ +| 1 | 2 | ++--------+-------+ +1 row in set (0,00 sec) +``` + +在ClickHouse中创建MySQL类型的数据库,同时与MySQL服务器交换数据: + +```sql +CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') +``` +```sql +SHOW DATABASES +``` +```text +┌─name─────┐ +│ default │ +│ mysql_db │ +│ system │ +└──────────┘ +``` +```sql +SHOW TABLES FROM mysql_db +``` +```text +┌─name─────────┐ +│ mysql_table │ +└──────────────┘ +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +└────────┴───────┘ +``` +```sql +INSERT INTO mysql_db.mysql_table VALUES (3,4) +``` +```sql +SELECT * FROM mysql_db.mysql_table +``` +```text +┌─int_id─┬─value─┐ +│ 1 │ 2 │ +│ 3 │ 4 │ +└────────┴───────┘ +``` + +[来源文章](https://clickhouse.yandex/docs/en/database_engines/mysql/) From 2464dd0b9fc73deb1d265b16f751e884e291af08 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 24 Aug 2019 11:51:02 +0300 Subject: [PATCH 50/59] fix --- .../queries/0_stateless/00943_materialize_index.sh | 6 +++--- .../0_stateless/00944_clear_index_in_partition.sh | 2 +- .../00975_indices_mutation_replicated_zookeeper.sh | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00943_materialize_index.sh b/dbms/tests/queries/0_stateless/00943_materialize_index.sh index feab59b368e..bc59b41b005 100755 --- a/dbms/tests/queries/0_stateless/00943_materialize_index.sh +++ b/dbms/tests/queries/0_stateless/00943_materialize_index.sh @@ -39,13 +39,13 @@ SET allow_experimental_data_skipping_indices=1; ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_2.txt" "test" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 2;" -wait_for_mutation "minmax_idx" "mutation_3.txt" "test" +wait_for_mutation "minmax_idx" "mutation_4.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" @@ -58,7 +58,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx;" -wait_for_mutation "minmax_idx" "mutation_4.txt" "test" +wait_for_mutation "minmax_idx" "mutation_5.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 5a7bdd8e3ae..74f15e63545 100755 --- a/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/dbms/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -43,7 +43,7 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "minmax_idx" "mutation_2.txt" "test" +wait_for_mutation "minmax_idx" "mutation_3.txt" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" diff --git a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh index 5e6159475f8..765dfb6abe5 100755 --- a/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00975_indices_mutation_replicated_zookeeper.sh @@ -45,20 +45,20 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test.indices_mutaions1 VALUES (9, 1, 2)" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" -sleep 0.5 +sleep 1 $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="ALTER TABLE test.indices_mutaions1 MATERIALIZE INDEX idx IN PARTITION 1;" -wait_for_mutation "indices_mutaions1" "mutation_2.txt" "test" -wait_for_mutation "indices_mutaions2" "mutation_2.txt" "test" +wait_for_mutation "indices_mutaions1" "0000000000" "test" +wait_for_mutation "indices_mutaions2" "0000000000" "test" $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2;" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read" $CLICKHOUSE_CLIENT --query="DROP TABLE test.indices_mutaions1" $CLICKHOUSE_CLIENT --query="DROP TABLE test.indices_mutaions2" From b7fdfcc7976bc31a822c98fc8d9219f626c54817 Mon Sep 17 00:00:00 2001 From: Weiqing Xu Date: Sat, 24 Aug 2019 17:27:36 +0800 Subject: [PATCH 51/59] fix HDFS HA can't work on DEBUG mode The Describe the bug when the using HDFS HA nameserivce as the uri, the port will be 0. hdfsBuilderSetNameNodePort will be called to set the port. hdfsBuilderSetNameNodePort call asset to check if the port is greater than 0. So in Release mode, it works OK. In the Debug mode, the asset will fail. How to reproduce when compiler the Clickhouse, use DEBUG mode, it will throw error when using HDFS HA nameservice url --- dbms/src/IO/HDFSCommon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/IO/HDFSCommon.cpp b/dbms/src/IO/HDFSCommon.cpp index 0f1a58942d6..a94fbeabd60 100644 --- a/dbms/src/IO/HDFSCommon.cpp +++ b/dbms/src/IO/HDFSCommon.cpp @@ -40,7 +40,10 @@ HDFSBuilderPtr createHDFSBuilder(const Poco::URI & uri) hdfsBuilderSetUserName(builder.get(), user.c_str()); } hdfsBuilderSetNameNode(builder.get(), host.c_str()); - hdfsBuilderSetNameNodePort(builder.get(), port); + if (port != 0) + { + hdfsBuilderSetNameNodePort(builder.get(), port); + } return builder; } From 720bb3ac08f74dbc7fd42738d8835562973d1671 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 02:07:05 +0300 Subject: [PATCH 52/59] Added a test --- libs/libcommon/include/common/find_symbols.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 920a7df04c5..162c73251fa 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -17,7 +17,7 @@ * but with the following differencies: * - works with any memory ranges, including containing zero bytes; * - doesn't require terminating zero byte: end of memory range is passed explicitly; - * - if not found, returns pointer to end instead of NULL; + * - if not found, returns pointer to end instead of nullptr; * - maximum number of symbols to search is 16. * * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in the case of large number of symbols, @@ -188,6 +188,7 @@ inline const char * find_first_symbols_sse42_impl(const char * const begin, cons || (num_chars >= 11 && maybe_negate(*pos == c11)) || (num_chars >= 12 && maybe_negate(*pos == c12)) || (num_chars >= 13 && maybe_negate(*pos == c13)) + || (num_chars >= 14 && maybe_negate(*pos == c14)) || (num_chars >= 15 && maybe_negate(*pos == c15)) || (num_chars >= 16 && maybe_negate(*pos == c16))) return pos; From 60fde1d29eb4f33a1abdc4eb273c278cf6a83f15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 15:03:54 +0300 Subject: [PATCH 53/59] Removed useless statements from debian directory --- debian/clickhouse-common-static.install | 1 - debian/clickhouse-server.docs | 1 + debian/clickhouse-server.install | 2 -- debian/rules | 16 ---------------- 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/debian/clickhouse-common-static.install b/debian/clickhouse-common-static.install index 6666b090272..81b1dc4eb1b 100644 --- a/debian/clickhouse-common-static.install +++ b/debian/clickhouse-common-static.install @@ -1,4 +1,3 @@ usr/bin/clickhouse usr/bin/clickhouse-odbc-bridge etc/security/limits.d/clickhouse.conf -usr/share/clickhouse/* diff --git a/debian/clickhouse-server.docs b/debian/clickhouse-server.docs index 95969d08c43..e12d6533be2 100644 --- a/debian/clickhouse-server.docs +++ b/debian/clickhouse-server.docs @@ -1,3 +1,4 @@ LICENSE AUTHORS README.md +CHANGELOG.md diff --git a/debian/clickhouse-server.install b/debian/clickhouse-server.install index f69969a6084..b1475fdf162 100644 --- a/debian/clickhouse-server.install +++ b/debian/clickhouse-server.install @@ -1,6 +1,4 @@ usr/bin/clickhouse-server -usr/bin/clickhouse-clang -usr/bin/clickhouse-lld usr/bin/clickhouse-copier usr/bin/clickhouse-report etc/clickhouse-server/config.xml diff --git a/debian/rules b/debian/rules index a49ffc3f66e..c21f0999bbc 100755 --- a/debian/rules +++ b/debian/rules @@ -32,11 +32,6 @@ endif CMAKE_FLAGS += -DENABLE_UTILS=0 -#DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8) - -#DEB_CC ?= gcc-7 -#DEB_CXX ?= g++-7 - ifdef DEB_CXX DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) DEB_HOST_GNU_TYPE := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) @@ -88,12 +83,8 @@ override_dh_auto_configure: override_dh_auto_build: # Fix for ninja. Do not add -O. $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) -# #cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) # cmake return true on error override_dh_auto_test: -# #TODO, use ENABLE_TESTS=1 -# #./debian/tests_wrapper.sh -# cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -R GLIBC_required_version cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server override_dh_clean: @@ -117,11 +108,6 @@ override_dh_install: mkdir -p $(DESTDIR)/etc/systemd/system/ cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/ - # fake metrika files when private dir is empty - mkdir -p $(DESTDIR)/etc/clickhouse-server/metrika - touch $(DESTDIR)/etc/clickhouse-server/metrika/config.xml - touch $(DESTDIR)/etc/clickhouse-server/metrika/users.xml - dh_install --list-missing --sourcedir=$(DESTDIR) override_dh_auto_install: @@ -130,7 +116,5 @@ override_dh_auto_install: override_dh_shlibdeps: true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency. -#TODO: faster packing of non-release builds: ifdef RELEASE_COMPATIBLE override_dh_builddeb: dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead. -#TODO: endif From cd620d2de517acff4943443a25fac71ac063b068 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Aug 2019 15:18:03 +0300 Subject: [PATCH 54/59] Fixed race condition in test (once again) --- .../queries/0_stateless/00600_replace_running_query.reference | 1 - dbms/tests/queries/0_stateless/00600_replace_running_query.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference index 804267a1c11..a01672aae85 100644 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference @@ -2,6 +2,5 @@ 1 1 1 -finished 42 readonly SELECT 2, count() FROM system.numbers 1 44 diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 513f6d8440e..dbbf41dd772 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -30,7 +30,7 @@ ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is alrea # Trying to replace query of a different user $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null wait ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & From 2dbfabd08c839a54a27ed99dead67b71ed8660ff Mon Sep 17 00:00:00 2001 From: Nikita Vasilev <31595000+nikvas0@users.noreply.github.com> Date: Sun, 25 Aug 2019 01:01:36 +0300 Subject: [PATCH 55/59] fix Set index check useless (#6651) * fixed useless detection * fixed useless detection * fix * fix * fix --- .../Storages/MergeTree/MergeTreeIndexSet.cpp | 25 ++----------------- .../00997_set_index_array.reference | 1 + .../0_stateless/00997_set_index_array.sql | 24 ++++++++++++++++++ 3 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00997_set_index_array.reference create mode 100644 dbms/tests/queries/0_stateless/00997_set_index_array.sql diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 40aba822353..954ac774583 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -405,25 +405,6 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const return true; } -static bool checkAtomName(const String & name) -{ - static std::set atoms = { - "notEquals", - "equals", - "less", - "greater", - "lessOrEquals", - "greaterOrEquals", - "in", - "notIn", - "like", - "startsWith", - "endsWith", - "multiSearchAny" - }; - return atoms.find(name) != atoms.end(); -} - bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic) const { if (const auto * func = node->as()) @@ -439,16 +420,14 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic return checkASTUseless(args[0], atomic) || checkASTUseless(args[1], atomic); else if (func->name == "not") return checkASTUseless(args[0], atomic); - else if (!atomic && checkAtomName(func->name)) - return checkASTUseless(node, true); else return std::any_of(args.begin(), args.end(), - [this, &atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); + [this](const auto & arg) { return checkASTUseless(arg, true); }); } else if (const auto * literal = node->as()) return !atomic && literal->value.get(); else if (const auto * identifier = node->as()) - return key_columns.find(identifier->getColumnName()) == key_columns.end(); + return key_columns.find(identifier->getColumnName()) == std::end(key_columns); else return true; } diff --git a/dbms/tests/queries/0_stateless/00997_set_index_array.reference b/dbms/tests/queries/0_stateless/00997_set_index_array.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_set_index_array.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00997_set_index_array.sql b/dbms/tests/queries/0_stateless/00997_set_index_array.sql new file mode 100644 index 00000000000..c57507ce22d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00997_set_index_array.sql @@ -0,0 +1,24 @@ +SET allow_experimental_data_skipping_indices = 1; + +DROP TABLE IF EXISTS test.set_array; + +CREATE TABLE test.set_array +( + primary_key String, + index_array Array(UInt64), + INDEX additional_index_array (index_array) TYPE set(10000) GRANULARITY 1 +) ENGINE = MergeTree() +ORDER BY (primary_key); + +INSERT INTO test.set_array +select + toString(intDiv(number, 1000000)) as primary_key, + array(number) as index_array +from system.numbers +limit 10000000; + +SET max_rows_to_read = 8192; + +select count() from test.set_array where has(index_array, 333); + +DROP TABLE test.set_array; \ No newline at end of file From 7144a3f827a34a30f50c7818681c9fe21d40ac93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Aug 2019 01:06:13 +0300 Subject: [PATCH 56/59] Speed up MemoryTracker by function inlining --- dbms/src/Common/CurrentThread.cpp | 6 ------ dbms/src/Common/CurrentThread.h | 7 ++++++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 446772f218d..ca39bec414c 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -51,12 +51,6 @@ MemoryTracker * CurrentThread::getMemoryTracker() return ¤t_thread->memory_tracker; } -Int64 & CurrentThread::getUntrackedMemory() -{ - /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() - return current_thread->untracked_memory; -} - void CurrentThread::updateProgressIn(const Progress & value) { if (unlikely(!current_thread)) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 01e46fbeadc..1e0140c6330 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -52,7 +52,12 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); - static Int64 & getUntrackedMemory(); + + static inline Int64 & getUntrackedMemory() + { + /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() + return current_thread->untracked_memory; + } /// Update read and write rows (bytes) statistics (used in system.query_thread_log) static void updateProgressIn(const Progress & value); From 6d3250867c1151ebeb8fb3a078fb73a6e1f6509a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Aug 2019 01:08:45 +0300 Subject: [PATCH 57/59] Fixed build of tests --- dbms/src/Interpreters/tests/aggregate.cpp | 2 +- dbms/src/Processors/tests/processors_test_aggregation.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/tests/aggregate.cpp b/dbms/src/Interpreters/tests/aggregate.cpp index e0068e9a56b..73e71d178ea 100644 --- a/dbms/src/Interpreters/tests/aggregate.cpp +++ b/dbms/src/Interpreters/tests/aggregate.cpp @@ -79,7 +79,7 @@ int main(int argc, char ** argv) Aggregator::Params params( stream->getHeader(), {0, 1}, aggregate_descriptions, - false, 0, OverflowMode::THROW, nullptr, 0, 0, 0, 0, false, "", 1); + false, 0, OverflowMode::THROW, 0, 0, 0, false, "", 1); Aggregator aggregator(params); diff --git a/dbms/src/Processors/tests/processors_test_aggregation.cpp b/dbms/src/Processors/tests/processors_test_aggregation.cpp index a645804eba8..2306de4edc0 100644 --- a/dbms/src/Processors/tests/processors_test_aggregation.cpp +++ b/dbms/src/Processors/tests/processors_test_aggregation.cpp @@ -224,8 +224,6 @@ try overflow_row, max_rows_to_group_by, OverflowMode::THROW, - nullptr, /// No compiler - 0, /// min_count_to_compile group_by_two_level_threshold, group_by_two_level_threshold_bytes, max_bytes_before_external_group_by, @@ -298,8 +296,6 @@ try overflow_row, max_rows_to_group_by, OverflowMode::THROW, - nullptr, /// No compiler - 0, /// min_count_to_compile group_by_two_level_threshold, group_by_two_level_threshold_bytes, max_bytes_before_external_group_by, From 8047aab684477c0a3bf7f78e30bdb954b011433f Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 26 Aug 2019 13:13:56 +0300 Subject: [PATCH 58/59] Add const --- dbms/src/Core/SettingsCommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h index 8fc557f1768..b8c56d50caa 100644 --- a/dbms/src/Core/SettingsCommon.h +++ b/dbms/src/Core/SettingsCommon.h @@ -325,7 +325,7 @@ private: /// Can be updated after first load for config/definition. /// Non updatable settings can be `changed`, /// if they were overwritten in config/definition. - bool updateable; + const bool updateable; GetStringFunction get_string; GetFieldFunction get_field; SetStringFunction set_string; From c2b6cffabc709afd1862ceb7348564d54c9f74a4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 26 Aug 2019 15:39:35 +0300 Subject: [PATCH 59/59] Update IStorage.cpp --- dbms/src/Storages/IStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index eb9b4adc4f4..855336c06e5 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -399,7 +399,7 @@ void IStorage::alterSettings( { if (hasSetting(change.name)) { - auto finder = [&change] (const SettingChange & c) { return c.name == change.name;}; + auto finder = [&change] (const SettingChange & c) { return c.name == change.name; }; if (auto it = std::find_if(storage_changes.begin(), storage_changes.end(), finder); it != storage_changes.end()) it->value = change.value; else