From 25ecc386154cc87703c6014bb44e3a42eff6b507 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 Dec 2019 19:44:50 +0300 Subject: [PATCH 1/9] More correct separation between metadata and data alters for non replicated MergeTree. --- dbms/src/Storages/AlterCommands.cpp | 27 +++-- dbms/src/Storages/AlterCommands.h | 11 ++- dbms/src/Storages/IStorage.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 20 ++-- dbms/src/Storages/StorageMergeTree.cpp | 98 ++++++++++--------- .../Storages/StorageReplicatedMergeTree.cpp | 4 + 6 files changed, 86 insertions(+), 76 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index bcfd852a628..b24863a2afb 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -109,7 +109,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = ast_col_decl.comment->as(); - command.comment = ast_comment.value.get(); + command.comment.emplace(ast_comment.value.get()); } if (ast_col_decl.ttl) @@ -225,7 +225,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri column.default_desc.kind = default_kind; column.default_desc.expression = default_expression; } - column.comment = comment; + column.comment = *comment; column.codec = codec; column.ttl = ttl; @@ -251,11 +251,8 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri column.codec = codec; } - if (!isMutable()) - { - column.comment = comment; - return; - } + if (comment) + column.comment = *comment; if (ttl) column.ttl = ttl; @@ -279,7 +276,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri } else if (type == COMMENT_COLUMN) { - columns_description.modify(column_name, [&](ColumnDescription & column) { column.comment = comment; }); + columns_description.modify(column_name, [&](ColumnDescription & column) { column.comment = *comment; }); } else if (type == ADD_INDEX) { @@ -390,13 +387,15 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } -bool AlterCommand::isMutable() const +bool AlterCommand::isModifyingData() const { - if (type == COMMENT_COLUMN || type == MODIFY_SETTING) - return false; + /// Change binary representation on disk if (type == MODIFY_COLUMN) return data_type.get() || default_expression; - return true; + + return type == ADD_COLUMN /// We need to change columns.txt in each part + || type == DROP_COLUMN /// We need to change columns.txt in each part + || type == DROP_INDEX; /// We need to remove file from filesystem } bool AlterCommand::isSettingsAlter() const @@ -666,11 +665,11 @@ void AlterCommands::applyForSettingsOnly(SettingsChanges & changes) const changes = std::move(out_changes); } -bool AlterCommands::isMutable() const +bool AlterCommands::isModifyingData() const { for (const auto & param : *this) { - if (param.isMutable()) + if (param.isModifyingData()) return true; } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 67fc166067b..1217d96dc29 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -47,7 +47,7 @@ struct AlterCommand ColumnDefaultKind default_kind{}; ASTPtr default_expression{}; - String comment; + std::optional comment; /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; @@ -102,8 +102,11 @@ struct AlterCommand ConstraintsDescription & constraints_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast, SettingsChanges & changes) const; - /// Checks that not only metadata touched by that command - bool isMutable() const; + /// Checks that alter query changes data. For MergeTree: + /// * column files (data and marks) + /// * each part meta (columns.txt) + /// in each part on disk (it's not lightweight alter). + bool isModifyingData() const; /// checks that only settings changed by alter bool isSettingsAlter() const; @@ -124,7 +127,7 @@ public: void applyForSettingsOnly(SettingsChanges & changes) const; void validate(const IStorage & table, const Context & context); - bool isMutable() const; + bool isModifyingData() const; bool isSettingsAlter() const; }; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index f5c34587fb2..169117f7b44 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -402,7 +402,7 @@ void IStorage::alter( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - if (params.isMutable()) + if (params.isModifyingData()) throw Exception("Method alter supports only change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); const String database_name = getDatabaseName(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2039c71b04b..8d892fd69d6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1386,7 +1386,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c for (const AlterCommand & command : commands) { - if (!command.isMutable()) + if (!command.isModifyingData()) { continue; } @@ -1433,9 +1433,9 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c getIndices().indices, new_indices.indices, unused_expression, unused_map, unused_bool); } -void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - const IndicesASTs & old_indices, const IndicesASTs & new_indices, ExpressionActionsPtr & out_expression, - NameToNameMap & out_rename_map, bool & out_force_update_metadata) const +void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, + const NamesAndTypesList & new_columns, const IndicesASTs & old_indices, const IndicesASTs & new_indices, + ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const { const auto settings = getSettings(); out_expression = nullptr; @@ -1457,7 +1457,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// Remove old indices - std::set new_indices_set; + std::unordered_set new_indices_set; for (const auto & index_decl : new_indices) new_indices_set.emplace(index_decl->as().name); for (const auto & index_decl : old_indices) @@ -1465,8 +1465,8 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name const auto & index = index_decl->as(); if (!new_indices_set.count(index.name)) { - out_rename_map["skp_idx_" + index.name + ".idx"] = ""; - out_rename_map["skp_idx_" + index.name + part_mrk_file_extension] = ""; + out_rename_map["skp_idx_" + index.name + ".idx"] = ""; /// drop this file + out_rename_map["skp_idx_" + index.name + part_mrk_file_extension] = ""; /// and this one } } @@ -1494,8 +1494,8 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// Delete files if they are no longer shared with another column. if (--stream_counts[file_name] == 0) { - out_rename_map[file_name + ".bin"] = ""; - out_rename_map[file_name + part_mrk_file_extension] = ""; + out_rename_map[file_name + ".bin"] = ""; /// drop this file + out_rename_map[file_name + part_mrk_file_extension] = ""; /// and this one } }, {}); } @@ -1847,7 +1847,7 @@ void MergeTreeData::AlterDataPartTransaction::commit() mutable_part.checksums = new_checksums; mutable_part.columns = new_columns; - /// 3) Delete the old files. + /// 3) Delete the old files and drop required columns (DROP COLUMN) for (const auto & from_to : rename_map) { String name = from_to.second.empty() ? from_to.first : from_to.second; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 41c9335de1e..e6a847ba1bd 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -249,34 +249,10 @@ void StorageMergeTree::alter( const String current_database_name = getDatabaseName(); const String current_table_name = getTableName(); - if (!params.isMutable()) - { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); - auto new_columns = getColumns(); - auto new_indices = getIndices(); - auto new_constraints = getConstraints(); - ASTPtr new_order_by_ast = order_by_ast; - ASTPtr new_primary_key_ast = primary_key_ast; - ASTPtr new_ttl_table_ast = ttl_table_ast; - SettingsChanges new_changes; - - params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); - - changeSettings(new_changes, table_lock_holder); - - IDatabase::ASTModifier settings_modifier = getSettingsModifier(new_changes); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, settings_modifier); - setColumns(std::move(new_columns)); - return; - } - - /// NOTE: Here, as in ReplicatedMergeTree, you can do ALTER which does not block the writing of data for a long time. - /// Also block moves, because they can replace part with old state - auto merge_blocker = merger_mutator.merges_blocker.cancel(); - auto moves_blocked = parts_mover.moves_blocker.cancel(); - lockNewDataStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + checkAlter(params, context); + auto new_columns = getColumns(); auto new_indices = getIndices(); auto new_constraints = getConstraints(); @@ -284,13 +260,11 @@ void StorageMergeTree::alter( ASTPtr new_primary_key_ast = primary_key_ast; ASTPtr new_ttl_table_ast = ttl_table_ast; SettingsChanges new_changes; + params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); - auto transactions = prepareAlterTransactions(new_columns, new_indices, context); - - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); - - IDatabase::ASTModifier storage_modifier = [&] (IAST & ast) + /// Modifier for storage AST in /metadata/storage_db/storage.sql + IDatabase::ASTModifier storage_modifier = [&](IAST & ast) { auto & storage_ast = ast.as(); @@ -310,24 +284,54 @@ void StorageMergeTree::alter( } }; - changeSettings(new_changes, table_lock_holder); - - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); - - - /// Reinitialize primary key because primary key column types might have changed. - setProperties(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); - - setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); - - for (auto & transaction : transactions) + /// Update metdata in memory + auto update_metadata = [&]() { - transaction->commit(); - transaction.reset(); - } + changeSettings(new_changes, table_lock_holder); + /// Reinitialize primary key because primary key column types might have changed. + setProperties(new_order_by_ast, new_primary_key_ast, new_columns, new_indices, new_constraints); - /// Columns sizes could be changed - recalculateColumnSizes(); + setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast); + }; + + /// This alter can be performed at metadata level only + if (!params.isModifyingData()) + { + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + + params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); + + IDatabase::ASTModifier settings_modifier = getSettingsModifier(new_changes); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); + + update_metadata(); + } + else + { + + /// NOTE: Here, as in ReplicatedMergeTree, you can do ALTER which does not block the writing of data for a long time. + /// Also block moves, because they can replace part with old state + auto merge_blocker = merger_mutator.merges_blocker.cancel(); + auto moves_blocked = parts_mover.moves_blocker.cancel(); + + + auto transactions = prepareAlterTransactions(new_columns, new_indices, context); + + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); + + update_metadata(); + + for (auto & transaction : transactions) + { + transaction->commit(); + transaction.reset(); + } + + /// Columns sizes could be changed + recalculateColumnSizes(); + } } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b3f69d463f2..ba4f14bd1fc 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3158,6 +3158,10 @@ void StorageReplicatedMergeTree::alter( const String current_database_name = getDatabaseName(); const String current_table_name = getTableName(); + /// We cannot check this alter commands with method isModifyingData() + /// because ReplicatedMergeTree stores both columns and metadata for + /// each replica. So we have to wait AlterThread even with lightweight + /// metadata alter. if (params.isSettingsAlter()) { /// We don't replicate storage_settings_ptr ALTER. It's local operation. From 05b933b1d3fa841acab9157537b913264ebab400 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 Dec 2019 21:07:51 +0300 Subject: [PATCH 2/9] Better --- dbms/src/Storages/AlterCommands.cpp | 22 +++--- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 70 +++++++++---------- dbms/src/Storages/StorageMergeTree.cpp | 3 - 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b24863a2afb..535105e5264 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -257,10 +257,16 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri if (ttl) column.ttl = ttl; - column.type = data_type; + if (data_type) + column.type = data_type; - column.default_desc.kind = default_kind; - column.default_desc.expression = default_expression; + /// User specified default expression or changed + /// datatype. We have to replace default. + if (default_expression || data_type) + { + column.default_desc.kind = default_kind; + column.default_desc.expression = default_expression; + } }); } else if (type == MODIFY_ORDER_BY) @@ -389,13 +395,13 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri bool AlterCommand::isModifyingData() const { - /// Change binary representation on disk + /// Possible change data representation on disk if (type == MODIFY_COLUMN) - return data_type.get() || default_expression; + return data_type != nullptr; - return type == ADD_COLUMN /// We need to change columns.txt in each part - || type == DROP_COLUMN /// We need to change columns.txt in each part - || type == DROP_INDEX; /// We need to remove file from filesystem + return type == ADD_COLUMN /// We need to change columns.txt in each part for MergeTree + || type == DROP_COLUMN /// We need to change columns.txt in each part for MergeTree + || type == DROP_INDEX; /// We need to remove file from filesystem for MergeTree } bool AlterCommand::isSettingsAlter() const diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 8d892fd69d6..c89af45f2fa 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1342,11 +1342,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); /// Set of columns that shouldn't be altered. - NameSet columns_alter_forbidden; + NameSet columns_alter_type_forbidden; /// Primary key columns can be ALTERed only if they are used in the key as-is /// (and not as a part of some expression) and if the ALTER only affects column metadata. - NameSet columns_alter_metadata_only; + NameSet columns_alter_type_metadata_only; if (partition_key_expr) { @@ -1354,13 +1354,13 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c /// TODO: in some cases (e.g. adding an Enum value) a partition key column can still be ALTERed. /// We should allow it. for (const String & col : partition_key_expr->getRequiredColumns()) - columns_alter_forbidden.insert(col); + columns_alter_type_forbidden.insert(col); } for (const auto & index : skip_indices) { for (const String & col : index->expr->getRequiredColumns()) - columns_alter_forbidden.insert(col); + columns_alter_type_forbidden.insert(col); } if (sorting_key_expr) @@ -1368,17 +1368,16 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c for (const ExpressionAction & action : sorting_key_expr->getActions()) { auto action_columns = action.getNeededColumns(); - columns_alter_forbidden.insert(action_columns.begin(), action_columns.end()); + columns_alter_type_forbidden.insert(action_columns.begin(), action_columns.end()); } for (const String & col : sorting_key_expr->getRequiredColumns()) - columns_alter_metadata_only.insert(col); + columns_alter_type_metadata_only.insert(col); /// We don't process sample_by_ast separately because it must be among the primary key columns /// and we don't process primary_key_expr separately because it is a prefix of sorting_key_expr. } - if (!merging_params.sign_column.empty()) - columns_alter_forbidden.insert(merging_params.sign_column); + columns_alter_type_forbidden.insert(merging_params.sign_column); std::map old_types; for (const auto & column : getColumns().getAllPhysical()) @@ -1386,34 +1385,26 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c for (const AlterCommand & command : commands) { - if (!command.isModifyingData()) + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { - continue; - } - - if (columns_alter_forbidden.count(command.column_name)) - throw Exception("Trying to ALTER key column " + command.column_name, ErrorCodes::ILLEGAL_COLUMN); - - if (columns_alter_metadata_only.count(command.column_name)) - { - if (command.type == AlterCommand::MODIFY_COLUMN) - { - auto it = old_types.find(command.column_name); - if (it != old_types.end() && isMetadataOnlyConversion(it->second, command.data_type.get())) - continue; - } - throw Exception( - "ALTER of key column " + command.column_name + " must be metadata-only", - ErrorCodes::ILLEGAL_COLUMN); + "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax", + ErrorCodes::BAD_ARGUMENTS); } - - if (command.type == AlterCommand::MODIFY_ORDER_BY) + else if (command.isModifyingData()) { - if (!is_custom_partitioned) - throw Exception( - "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + if (columns_alter_type_forbidden.count(command.column_name)) + throw Exception("Trying to ALTER key column " + command.column_name, ErrorCodes::ILLEGAL_COLUMN); + + if (columns_alter_type_metadata_only.count(command.column_name)) + { + if (command.type == AlterCommand::MODIFY_COLUMN) + { + auto it = old_types.find(command.column_name); + if (it == old_types.end() || !isMetadataOnlyConversion(it->second, command.data_type.get())) + throw Exception("ALTER of key column " + command.column_name + " must be metadata-only", ErrorCodes::ILLEGAL_COLUMN); + } + } } } @@ -1425,12 +1416,15 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c for (const auto & setting : new_changes) checkSettingCanBeChanged(setting.name); - /// Check that type conversions are possible. - ExpressionActionsPtr unused_expression; - NameToNameMap unused_map; - bool unused_bool; - createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), - getIndices().indices, new_indices.indices, unused_expression, unused_map, unused_bool); + if (commands.isModifyingData()) + { + /// Check that type conversions are possible. + ExpressionActionsPtr unused_expression; + NameToNameMap unused_map; + bool unused_bool; + createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), + getIndices().indices, new_indices.indices, unused_expression, unused_map, unused_bool); + } } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index e6a847ba1bd..66d22da5c3b 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -299,9 +299,6 @@ void StorageMergeTree::alter( { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); - params.apply(new_columns, new_indices, new_constraints, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast, new_changes); - - IDatabase::ASTModifier settings_modifier = getSettingsModifier(new_changes); context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, new_constraints, storage_modifier); update_metadata(); From 34d2afa3549f74d672d72303ec8b3017e1e816a8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 Dec 2019 23:03:33 +0300 Subject: [PATCH 3/9] Fix obvious bug --- dbms/src/Storages/AlterCommands.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 535105e5264..217f7787d75 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -225,7 +225,9 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri column.default_desc.kind = default_kind; column.default_desc.expression = default_expression; } - column.comment = *comment; + if (comment) + column.comment = *comment; + column.codec = codec; column.ttl = ttl; From 80f42a1f3b4ee20dc8d573c7fc671df8003015a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 Dec 2019 23:07:44 +0300 Subject: [PATCH 4/9] Show physical addresses in StackTrace --- dbms/src/Common/Dwarf.h | 4 ++-- dbms/src/Common/StackTrace.cpp | 17 ++++++++++------- dbms/src/Common/SymbolIndex.h | 1 + dbms/src/Common/tests/symbol_index.cpp | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/Dwarf.h b/dbms/src/Common/Dwarf.h index 2f97212d4d7..9abb526a210 100644 --- a/dbms/src/Common/Dwarf.h +++ b/dbms/src/Common/Dwarf.h @@ -127,8 +127,8 @@ public: uint64_t line = 0; }; - /** - * Find the file and line number information corresponding to address. + /** Find the file and line number information corresponding to address. + * The address must be physical - offset in object file without offset in virtual memory where the object is loaded. */ bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const; diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp index 2f3c4e9c2fa..597ed2028fa 100644 --- a/dbms/src/Common/StackTrace.cpp +++ b/dbms/src/Common/StackTrace.cpp @@ -258,10 +258,14 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs for (size_t i = offset; i < size; ++i) { - const void * addr = frames[i]; + const void * virtual_addr = frames[i]; + auto object = symbol_index.findObject(virtual_addr); + uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; + const void * physical_addr = reinterpret_cast(uintptr_t(virtual_addr) - virtual_offset); - out << i << ". " << addr << " "; - auto symbol = symbol_index.findSymbol(addr); + out << i << ". " << physical_addr << " "; + + auto symbol = symbol_index.findSymbol(virtual_addr); if (symbol) { int status = 0; @@ -272,18 +276,17 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs out << " "; - if (auto object = symbol_index.findObject(addr)) + if (object) { if (std::filesystem::exists(object->name)) { auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress(uintptr_t(addr) - uintptr_t(object->address_begin), location, DB::Dwarf::LocationInfoMode::FAST)) + if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) out << location.file.toString() << ":" << location.line; - else - out << object->name; } + out << " in " << object->name; } else out << "?"; diff --git a/dbms/src/Common/SymbolIndex.h b/dbms/src/Common/SymbolIndex.h index 0e249c59bb2..1e762780dad 100644 --- a/dbms/src/Common/SymbolIndex.h +++ b/dbms/src/Common/SymbolIndex.h @@ -38,6 +38,7 @@ public: std::unique_ptr elf; }; + /// Address in virtual memory should be passed. These addresses include offset where the object is loaded in memory. const Symbol * findSymbol(const void * address) const; const Object * findObject(const void * address) const; diff --git a/dbms/src/Common/tests/symbol_index.cpp b/dbms/src/Common/tests/symbol_index.cpp index d1867cb524e..1c7e0ffc27d 100644 --- a/dbms/src/Common/tests/symbol_index.cpp +++ b/dbms/src/Common/tests/symbol_index.cpp @@ -49,7 +49,7 @@ int main(int argc, char ** argv) Dwarf dwarf(*object->elf); Dwarf::LocationInfo location; - if (dwarf.findAddress(uintptr_t(address), location, Dwarf::LocationInfoMode::FAST)) + if (dwarf.findAddress(uintptr_t(address) - uintptr_t(info.dli_fbase), location, Dwarf::LocationInfoMode::FAST)) std::cerr << location.file.toString() << ":" << location.line << "\n"; else std::cerr << "Dwarf: Not found\n"; From 6e785ea4bd0fbb4cf0ddc471bb49662a01bddbbc Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 25 Dec 2019 13:42:00 +0300 Subject: [PATCH 5/9] DOCS-173: clickhouse benchmark (#7947) * CLICKHOUSEDOCS-173: Draft of a description. * CLICKHOUSEDOCS-173: Table experiment. * CLICKHOUSEDOCS-173: Further writing. * CLICKHOUSEDOCS-173: Final. * CLICKHOUSEDOCS-173: Updated by comments. * CLICKHOUSEDOCS-173: Updated by comments. * CLICKHOUSEDOCS-173: Supported other languages. --- .../operations/utils/clickhouse-benchmark.md | 146 ++++++++++++++++++ docs/en/operations/utils/index.md | 6 +- .../operations/utils/clickhouse-benchmark.md | 1 + .../operations/utils/clickhouse-benchmark.md | 1 + docs/toc_en.yml | 1 + docs/toc_fa.yml | 3 +- docs/toc_ja.yml | 3 +- docs/toc_zh.yml | 3 +- .../operations/utils/clickhouse-benchmark.md | 1 + docs/zh/operations/utils/index.md | 6 +- 10 files changed, 162 insertions(+), 9 deletions(-) create mode 100644 docs/en/operations/utils/clickhouse-benchmark.md create mode 120000 docs/fa/operations/utils/clickhouse-benchmark.md create mode 120000 docs/ja/operations/utils/clickhouse-benchmark.md create mode 120000 docs/zh/operations/utils/clickhouse-benchmark.md diff --git a/docs/en/operations/utils/clickhouse-benchmark.md b/docs/en/operations/utils/clickhouse-benchmark.md new file mode 100644 index 00000000000..5707158e671 --- /dev/null +++ b/docs/en/operations/utils/clickhouse-benchmark.md @@ -0,0 +1,146 @@ +# clickhouse-benchmark + +Connects to a ClickHouse server and repeatedly sends specified queries. + +Syntax: + +```bash +$ echo "single query" | clickhouse-benchmark [keys] +``` +or +```bash +$ clickhouse-benchmark [keys] <<< "single query" +``` + +If you want to send a set of queries, create a text file and place each query on the individual string in this file. For example: + +```sql +SELECT * FROM system.numbers LIMIT 10000000 +SELECT 1 +``` + +Then pass this file to a standard input of `clickhouse-benchmark`. + +```bash +clickhouse-benchmark [keys] < queries_file +``` + +## Keys {#clickhouse-benchmark-keys} + +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. +- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. +- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. +- `-s`, `--secure` — Using TLS connection. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) test to determine whether the two distributions aren't different with the selected level of confidence. +- `--cumulative` — Printing cumulative data instead of data per interval. +- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. +- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. +- `--user=USERNAME` — ClickHouse user name. Default value: `default`. +- `--password=PSWD` — ClickHouse user password. Default value: empty string. +- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. +- `--help` — Shows the help message. + +If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. + +## Output {#clickhouse-benchmark-output} + +By default, `clickhouse-benchmark` reports for each `--delay` interval. + +Example of the report: + +```text +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. + +0.000% 0.145 sec. +10.000% 0.146 sec. +20.000% 0.146 sec. +30.000% 0.146 sec. +40.000% 0.147 sec. +50.000% 0.148 sec. +60.000% 0.148 sec. +70.000% 0.148 sec. +80.000% 0.149 sec. +90.000% 0.150 sec. +95.000% 0.150 sec. +99.000% 0.150 sec. +99.900% 0.150 sec. +99.990% 0.150 sec. +``` + +In the report you can find: + +- Number of queries in the `Queries executed: ` field. +- Status string containing (in order): + + - Endpoint of ClickHouse server. + - Number of processed queries. + - QPS: QPS: How many queries server performed per second during a period specified in the `--delay` argument. + - RPS: How many rows server read per second during a period specified in the `--delay` argument. + - MiB/s: How many mebibytes server read per second during a period specified in the `--delay` argument. + - result RPS: How many rows placed by server to the result of a query per second during a period specified in the `--delay` argument. + - result MiB/s. How many mebibytes placed by server to the result of a query per second during a period specified in the `--delay` argument. + +- Percentiles of queries execution time. + + +## Comparison mode {#clickhouse-benchmark-comparison-mode} + +`clickhouse-benchmark` can compare performances for two running ClickHouse servers. + +To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. + +## Example {#clickhouse-benchmark-example} + +```bash +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +``` +```text +Loaded 1 queries. + +Queries executed: 6. + +localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.159 sec. +30.000% 0.160 sec. +40.000% 0.160 sec. +50.000% 0.162 sec. +60.000% 0.164 sec. +70.000% 0.165 sec. +80.000% 0.166 sec. +90.000% 0.166 sec. +95.000% 0.167 sec. +99.000% 0.167 sec. +99.900% 0.167 sec. +99.990% 0.167 sec. + + + +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.160 sec. +30.000% 0.163 sec. +40.000% 0.164 sec. +50.000% 0.165 sec. +60.000% 0.166 sec. +70.000% 0.166 sec. +80.000% 0.167 sec. +90.000% 0.167 sec. +95.000% 0.170 sec. +99.000% 0.172 sec. +99.900% 0.172 sec. +99.990% 0.172 sec. +``` diff --git a/docs/en/operations/utils/index.md b/docs/en/operations/utils/index.md index ca0f0954150..3fcd66da8d8 100644 --- a/docs/en/operations/utils/index.md +++ b/docs/en/operations/utils/index.md @@ -1,7 +1,7 @@ # ClickHouse Utility -* [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. -* [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. - +- [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. +- [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [clickhouse-benchmark](clickhouse-benchmark.md) — Loads server with the custom queries and settings. [Original article](https://clickhouse.yandex/docs/en/operations/utils/) diff --git a/docs/fa/operations/utils/clickhouse-benchmark.md b/docs/fa/operations/utils/clickhouse-benchmark.md new file mode 120000 index 00000000000..133b4d2e511 --- /dev/null +++ b/docs/fa/operations/utils/clickhouse-benchmark.md @@ -0,0 +1 @@ +../../../en/operations/utils/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/ja/operations/utils/clickhouse-benchmark.md b/docs/ja/operations/utils/clickhouse-benchmark.md new file mode 120000 index 00000000000..133b4d2e511 --- /dev/null +++ b/docs/ja/operations/utils/clickhouse-benchmark.md @@ -0,0 +1 @@ +../../../en/operations/utils/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 8a2b32b240a..a11c40e4907 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -212,6 +212,7 @@ nav: - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' + - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' - 'Development': - 'hidden': 'development/index.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index c5a2a7fd80b..710a2ee20f8 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -208,7 +208,8 @@ nav: - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - + - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' + - 'F.A.Q.': - 'General Questions': 'faq/general.md' diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index 8a2b32b240a..945042f0fef 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -212,7 +212,8 @@ nav: - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - + - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' + - 'Development': - 'hidden': 'development/index.md' - 'Overview of ClickHouse Architecture': 'development/architecture.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 7395dcfe145..09f9875069b 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -207,7 +207,8 @@ nav: - '介绍': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - + - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' + - '常见问题': - '一般的问题': 'faq/general.md' diff --git a/docs/zh/operations/utils/clickhouse-benchmark.md b/docs/zh/operations/utils/clickhouse-benchmark.md new file mode 120000 index 00000000000..133b4d2e511 --- /dev/null +++ b/docs/zh/operations/utils/clickhouse-benchmark.md @@ -0,0 +1 @@ +../../../en/operations/utils/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/zh/operations/utils/index.md b/docs/zh/operations/utils/index.md index ca0f0954150..3fcd66da8d8 100644 --- a/docs/zh/operations/utils/index.md +++ b/docs/zh/operations/utils/index.md @@ -1,7 +1,7 @@ # ClickHouse Utility -* [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. -* [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. - +- [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. +- [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [clickhouse-benchmark](clickhouse-benchmark.md) — Loads server with the custom queries and settings. [Original article](https://clickhouse.yandex/docs/en/operations/utils/) From 3b6e3db0c285e6d89adac213b6a207723886a112 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 Dec 2019 15:34:04 +0300 Subject: [PATCH 6/9] Remove redundant alter_lock, because parts cannot be altered concurrently. --- dbms/src/Interpreters/MutationsInterpreter.cpp | 2 ++ dbms/src/Interpreters/MutationsInterpreter.h | 7 ++++--- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +--- dbms/src/Storages/MergeTree/MergeTreeDataPart.h | 10 ---------- dbms/src/Storages/StorageMergeTree.cpp | 1 - 5 files changed, 7 insertions(+), 17 deletions(-) diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index 31470b08468..8ff10e92dee 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -339,6 +339,8 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) affected_materialized.emplace(mat_column); } + /// Just to be sure, that we don't change type + /// after update expression execution. const auto & update_expr = kv.second; auto updated_column = makeASTFunction("CAST", makeASTFunction("if", diff --git a/dbms/src/Interpreters/MutationsInterpreter.h b/dbms/src/Interpreters/MutationsInterpreter.h index a5c1df778cb..4aac3182205 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.h +++ b/dbms/src/Interpreters/MutationsInterpreter.h @@ -65,8 +65,9 @@ private: /// Each stage has output_columns that contain columns that are changed at the end of that stage /// plus columns needed for the next mutations. /// - /// First stage is special: it can contain only DELETEs and is executed using InterpreterSelectQuery - /// to take advantage of table indexes (if there are any). + /// First stage is special: it can contain only filters and is executed using InterpreterSelectQuery + /// to take advantage of table indexes (if there are any). It's necessary because all mutations have + /// `WHERE clause` part. struct Stage { @@ -83,7 +84,7 @@ private: /// A chain of actions needed to execute this stage. /// First steps calculate filter columns for DELETEs (in the same order as in `filter_column_names`), - /// then there is (possibly) an UPDATE stage, and finally a projection stage. + /// then there is (possibly) an UPDATE step, and finally a projection step. ExpressionActionsChain expressions_chain; Names filter_column_names; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 248be299fa3..affdade1c9b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -234,7 +234,7 @@ public: const NamesAndTypesList & getNewColumns() const { return new_columns; } const DataPart::Checksums & getNewChecksums() const { return new_checksums; } - AlterDataPartTransaction(DataPartPtr data_part_) : data_part(data_part_), alter_lock(data_part->alter_mutex) {} + AlterDataPartTransaction(DataPartPtr data_part_) : data_part(data_part_) {} const DataPartPtr & getDataPart() const { return data_part; } bool isValid() const; @@ -244,9 +244,7 @@ public: bool valid = true; - //don't interchange order of data_part & alter_lock DataPartPtr data_part; - DataPartsLock alter_lock; DataPart::Checksums new_checksums; NamesAndTypesList new_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index d47411c9068..cf62b84f7ba 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -227,16 +227,6 @@ struct MergeTreeDataPart */ mutable std::shared_mutex columns_lock; - /** It is taken for the whole time ALTER a part: from the beginning of the recording of the temporary files to their renaming to permanent. - * It is taken with unlocked `columns_lock`. - * - * NOTE: "You can" do without this mutex if you could turn ReadRWLock into WriteRWLock without removing the lock. - * This transformation is impossible, because it would create a deadlock, if you do it from two threads at once. - * Taking this mutex means that we want to lock columns_lock on read with intention then, not - * unblocking, block it for writing. - */ - mutable std::mutex alter_mutex; - MergeTreeIndexGranularityInfo index_granularity_info; ~MergeTreeDataPart(); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 9d284800712..f1df178e810 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -454,7 +454,6 @@ void StorageMergeTree::mutate(const MutationCommands & commands, const Context & auto check = [version, this]() { return isMutationDone(version); }; std::unique_lock lock(mutation_wait_mutex); mutation_wait_event.wait(lock, check); - } } From 6701adceb702fc8b776730c2a60a47ae9f9ab6ef Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 25 Dec 2019 16:42:40 +0300 Subject: [PATCH 7/9] DOCS-139: FINAL. EN review, RU translation. (#8391) * Update select.md (#78) * CLICKHOUSEDOCS-85: RU translation. * Update docs/en/query_language/select.md Co-Authored-By: Ivan Blinkov * Update docs/en/query_language/select.md Co-Authored-By: Ivan Blinkov * Update docs/ru/query_language/select.md Co-Authored-By: Ivan Blinkov Co-authored-by: FeehanG <51821376+FeehanG@users.noreply.github.com> Co-authored-by: Ivan Blinkov --- docs/en/query_language/select.md | 12 ++++++------ docs/ru/query_language/select.md | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index cfa3a1e4dc7..e6e7676c643 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -114,18 +114,18 @@ If a query does not list any columns (for example, `SELECT count() FROM t`), som #### FINAL Modifier {#select-from-final} -Appliable when selecting data from tables of the [MergeTree](../operations/table_engines/mergetree.md)-engine family, except `GraphiteMergeTree`. When `FINAL` is specified, ClickHouse fully merges data before returning the result and thus performs all data transformations that are supposed to happen during merges for given table engine. +Applicable when selecting data from tables from the [MergeTree](../operations/table_engines/mergetree.md)-engine family other than `GraphiteMergeTree`. When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. Also supported for: - [Replicated](../operations/table_engines/replication.md) versions of `MergeTree` engines. -- [View](../operations/table_engines/view.md), [Buffer](../operations/table_engines/buffer.md), [Distributed](../operations/table_engines/distributed.md), [MaterializedView](../operations/table_engines/materializedview.md) engines that operate over other engines, if they created over `MergeTree`-engine tables. +- [View](../operations/table_engines/view.md), [Buffer](../operations/table_engines/buffer.md), [Distributed](../operations/table_engines/distributed.md), and [MaterializedView](../operations/table_engines/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. -The queries that use `FINAL` are executed slower than similar queries that don't, because: +Queries that use `FINAL` are executed not as fast as similar queries that don't, because: -- Query is executed in a single thread, and data is merged during query execution. -- Queries with `FINAL` read primary key columns additionally to the columns specified in the query. +- Query is executed in a single thread and data is merged during query execution. +- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. -In the most cases, avoid using `FINAL`. +In most cases, avoid using `FINAL`. ### SAMPLE Clause {#select-sample-clause} diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 56549f21e53..6c652557858 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -114,6 +114,23 @@ Cекция `FROM` определяет источник данных: Модификатор `FINAL` может быть использован в запросе `SELECT` из таблиц семейства [MergeTree](../operations/table_engines/mergetree.md). При указании `FINAL`, данные будут выбираться полностью "домерженными". Стоит учитывать, что использование `FINAL` приводит к чтению также столбцов, относящихся к первичному ключу. Также, запрос будет выполняться в один поток, и при выполнении запроса будет выполняться слияние данных. Это приводит к тому, что при использовании `FINAL`, запрос выполняется медленнее. В большинстве случаев, следует избегать использования `FINAL`. Модификатор `FINAL` может быть использован для всех таблиц семейства `MergeTree`, которые производят преобразования данных в процессе фоновых слияний (кроме GraphiteMergeTree). +#### FINAL Modifier {#select-from-final} + +Применим при выборке данных из таблиц с движками таблиц семейства [MergeTree](../operations/table_engines/mergetree.md), кроме `GraphiteMergeTree`. Если в запросе используется `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. + +Также поддержан для движков: + +- [Replicated](../operations/table_engines/replication.md)-версий `MergeTree`. +- [View](../operations/table_engines/view.md), [Buffer](../operations/table_engines/buffer.md), [Distributed](../operations/table_engines/distributed.md), и [MaterializedView](../operations/table_engines/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. + +Запросы, использующие `FINAL` исполняются медленнее аналогичных запросов без `FINAL`, поскольку: + +- Запрос исполняется в один поток и данные мёржатся в процессе выполнения. +- Запросы с модификатором `FINAL` дополнительно к столбцам, указанным в запросе, читают столбцы первичного ключа. + +По возможности не используйте модификатор `FINAL`. + + ### Секция SAMPLE {#select-sample-clause} Секция `SAMPLE` позволяет выполнять запросы приближённо. Например, чтобы посчитать статистику по всем визитам, можно обработать 1/10 всех визитов и результат домножить на 10. From 17f175469542cdba68b9725c8234a23b9dd9986d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 25 Dec 2019 19:08:44 +0300 Subject: [PATCH 8/9] Publish different files for perftest. --- docker/packager/binary/build.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 7d6b7a518d4..51ce99a9af1 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -21,6 +21,14 @@ mv ./dbms/unit_tests_dbms /output find . -name '*.so' -print -exec mv '{}' /output \; find . -name '*.so.*' -print -exec mv '{}' /output \; +# Different files for performance test. +if [ "performance" == "$COMBINED_OUTPUT" ] +then + cp -r ../dbms/tests/performance /output + rm /output/unit_tests_dbms ||: + rm /output/clickhouse-odbc-bridge ||: +fi + # May be set for split build or for performance test. if [ "" != "$COMBINED_OUTPUT" ] then From 438ea5bc22efda6e5218fc5ad35af0360c1e7dd1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 25 Dec 2019 22:17:41 +0300 Subject: [PATCH 9/9] Remove context from formats (#8388) * Remove Context from formats. --- dbms/CMakeLists.txt | 4 +- .../ParallelParsingBlockInputStream.cpp | 2 +- .../ParallelParsingBlockInputStream.h | 12 ++-- dbms/src/Formats/FormatFactory.cpp | 62 ++++++++++++++----- dbms/src/Formats/FormatFactory.h | 4 -- dbms/src/Formats/FormatSchemaInfo.cpp | 14 ++--- dbms/src/Formats/FormatSchemaInfo.h | 2 +- dbms/src/Formats/FormatSettings.h | 21 +++++++ dbms/src/Formats/NativeFormat.cpp | 2 - dbms/src/Formats/NullFormat.cpp | 1 - .../Formats/ParsedTemplateFormatString.cpp | 18 +++--- dbms/src/Formats/ParsedTemplateFormatString.h | 5 +- dbms/src/Functions/FunctionsConversion.cpp | 23 ------- dbms/src/Functions/FunctionsConversion.h | 48 +++++++++----- dbms/src/Interpreters/castColumn.cpp | 12 +++- dbms/src/Interpreters/castColumn.h | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 4 +- .../Formats/Impl/ArrowColumnToCHColumn.h | 2 +- .../Formats/Impl/BinaryRowInputFormat.cpp | 2 - .../Formats/Impl/BinaryRowOutputFormat.cpp | 2 - .../Formats/Impl/CSVRowInputFormat.cpp | 1 - .../Formats/Impl/CSVRowOutputFormat.cpp | 1 - .../Formats/Impl/CapnProtoRowInputFormat.cpp | 5 +- .../Impl/JSONCompactEachRowRowInputFormat.cpp | 2 - .../JSONCompactEachRowRowOutputFormat.cpp | 2 - .../Impl/JSONCompactRowOutputFormat.cpp | 1 - .../Impl/JSONEachRowRowInputFormat.cpp | 1 - .../Impl/JSONEachRowRowOutputFormat.cpp | 1 - ...JSONEachRowWithProgressRowOutputFormat.cpp | 1 - .../Formats/Impl/JSONRowOutputFormat.cpp | 1 - .../Formats/Impl/MySQLOutputFormat.cpp | 30 ++++----- .../Formats/Impl/MySQLOutputFormat.h | 15 +++-- .../Processors/Formats/Impl/NativeFormat.cpp | 2 - .../Processors/Formats/Impl/NullFormat.cpp | 1 - .../Impl/ODBCDriver2BlockOutputFormat.cpp | 2 +- .../Impl/ODBCDriverBlockOutputFormat.cpp | 1 - .../Formats/Impl/ORCBlockInputFormat.cpp | 9 ++- .../Formats/Impl/ORCBlockInputFormat.h | 4 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 10 +-- .../Formats/Impl/ParquetBlockInputFormat.h | 4 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 1 - .../Formats/Impl/PrettyBlockOutputFormat.cpp | 2 - .../Impl/PrettyCompactBlockOutputFormat.cpp | 3 - .../Impl/PrettySpaceBlockOutputFormat.cpp | 2 - .../Formats/Impl/ProtobufRowInputFormat.cpp | 6 +- .../Formats/Impl/ProtobufRowOutputFormat.cpp | 8 +-- .../Formats/Impl/TSKVRowInputFormat.cpp | 1 - .../Formats/Impl/TSKVRowOutputFormat.cpp | 1 - .../Impl/TabSeparatedRowInputFormat.cpp | 3 - .../Impl/TabSeparatedRowOutputFormat.cpp | 4 -- .../Impl/TemplateBlockOutputFormat.cpp | 26 ++++---- .../Formats/Impl/TemplateBlockOutputFormat.h | 5 +- .../Formats/Impl/TemplateRowInputFormat.cpp | 39 ++++++------ .../Formats/Impl/TemplateRowInputFormat.h | 7 ++- .../Formats/Impl/ValuesBlockInputFormat.cpp | 7 +-- .../Formats/Impl/ValuesBlockInputFormat.h | 7 ++- .../Formats/Impl/ValuesRowOutputFormat.cpp | 1 - .../Formats/Impl/VerticalRowOutputFormat.cpp | 1 - .../Formats/Impl/XMLRowOutputFormat.cpp | 1 - 59 files changed, 234 insertions(+), 227 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 45d12f8ed93..466b3daf94f 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -186,8 +186,8 @@ endif() list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) -list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/FunctionHelpers.cpp) -list (APPEND dbms_headers src/Functions/IFunctionImpl.h src/Functions/FunctionFactory.h src/Functions/FunctionHelpers.h) +list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/FunctionHelpers.cpp src/Functions/extractTimeZoneFromFunctionArguments.cpp) +list (APPEND dbms_headers src/Functions/IFunctionImpl.h src/Functions/FunctionFactory.h src/Functions/FunctionHelpers.h src/Functions/extractTimeZoneFromFunctionArguments.h) list (APPEND dbms_sources src/AggregateFunctions/AggregateFunctionFactory.cpp diff --git a/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp index c894af82580..3f6ddbd7a15 100644 --- a/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp @@ -65,7 +65,7 @@ void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_n */ ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0); auto parser = std::make_unique( - input_processor_creator(read_buffer, header, context, + input_processor_creator(read_buffer, header, row_input_format_params, format_settings)); unit.block_ext.block.clear(); diff --git a/dbms/src/DataStreams/ParallelParsingBlockInputStream.h b/dbms/src/DataStreams/ParallelParsingBlockInputStream.h index 8c276f2f7dd..89a9d7c8926 100644 --- a/dbms/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelParsingBlockInputStream.h @@ -55,23 +55,21 @@ private: using InputProcessorCreator = std::function; public: struct InputCreatorParams { - const Block &sample; - const Context &context; - const RowInputFormatParams& row_input_format_params; + const Block & sample; + const RowInputFormatParams & row_input_format_params; const FormatSettings &settings; }; struct Params { ReadBuffer & read_buffer; - const InputProcessorCreator &input_processor_creator; - const InputCreatorParams &input_creator_params; + const InputProcessorCreator & input_processor_creator; + const InputCreatorParams & input_creator_params; FormatFactory::FileSegmentationEngine file_segmentation_engine; int max_threads; size_t min_chunk_bytes; @@ -79,7 +77,6 @@ public: explicit ParallelParsingBlockInputStream(const Params & params) : header(params.input_creator_params.sample), - context(params.input_creator_params.context), row_input_format_params(params.input_creator_params.row_input_format_params), format_settings(params.input_creator_params.settings), input_processor_creator(params.input_processor_creator), @@ -149,7 +146,6 @@ protected: private: const Block header; - const Context context; const RowInputFormatParams row_input_format_params; const FormatSettings format_settings; const InputProcessorCreator input_processor_creator; diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index aa65fe5765f..240e591123f 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include namespace DB @@ -34,7 +36,7 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name) } -static FormatSettings getInputFormatSetting(const Settings & settings) +static FormatSettings getInputFormatSetting(const Settings & settings, const Context & context) { FormatSettings format_settings; format_settings.csv.delimiter = settings.format_csv_delimiter; @@ -56,11 +58,21 @@ static FormatSettings getInputFormatSetting(const Settings & settings) format_settings.template_settings.row_format = settings.format_template_row; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default; + format_settings.schema.format_schema = settings.format_schema; + format_settings.schema.format_schema_path = context.getFormatSchemaPath(); + format_settings.schema.is_server = context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER); + format_settings.custom.result_before_delimiter = settings.format_custom_result_before_delimiter; + format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; + format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; + format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; + format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter; + format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter; + format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; return format_settings; } -static FormatSettings getOutputFormatSetting(const Settings & settings) +static FormatSettings getOutputFormatSetting(const Settings & settings, const Context & context) { FormatSettings format_settings; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; @@ -77,6 +89,16 @@ static FormatSettings getOutputFormatSetting(const Settings & settings) format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; format_settings.write_statistics = settings.output_format_write_statistics; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; + format_settings.schema.format_schema = settings.format_schema; + format_settings.schema.format_schema_path = context.getFormatSchemaPath(); + format_settings.schema.is_server = context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER); + format_settings.custom.result_before_delimiter = settings.format_custom_result_before_delimiter; + format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; + format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; + format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; + format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter; + format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter; + format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; return format_settings; } @@ -100,9 +122,9 @@ BlockInputStreamPtr FormatFactory::getInput( throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); const Settings & settings = context.getSettingsRef(); - FormatSettings format_settings = getInputFormatSetting(settings); + FormatSettings format_settings = getInputFormatSetting(settings, context); - return input_getter(buf, sample, context, max_block_size, callback ? callback : ReadCallback(), format_settings); + return input_getter(buf, sample, max_block_size, callback ? callback : ReadCallback(), format_settings); } const Settings & settings = context.getSettingsRef(); @@ -118,7 +140,7 @@ BlockInputStreamPtr FormatFactory::getInput( if (!input_getter) throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); - FormatSettings format_settings = getInputFormatSetting(settings); + FormatSettings format_settings = getInputFormatSetting(settings, context); RowInputFormatParams row_input_format_params; row_input_format_params.max_block_size = max_block_size; @@ -128,7 +150,7 @@ BlockInputStreamPtr FormatFactory::getInput( row_input_format_params.max_execution_time = settings.max_execution_time; row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode; - auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, context, row_input_format_params, format_settings}; + auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings}; ParallelParsingBlockInputStream::Params params{buf, input_getter, input_creator_params, file_segmentation_engine, static_cast(settings.max_threads), @@ -164,16 +186,16 @@ BlockOutputStreamPtr FormatFactory::getOutput( throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); const Settings & settings = context.getSettingsRef(); - FormatSettings format_settings = getOutputFormatSetting(settings); + FormatSettings format_settings = getOutputFormatSetting(settings, context); /** Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. */ return std::make_shared( - output_getter(buf, sample, context, callback, format_settings), sample); + output_getter(buf, sample, std::move(callback), format_settings), sample); } - auto format = getOutputFormat(name, buf, sample, context, callback); + auto format = getOutputFormat(name, buf, sample, context, std::move(callback)); return std::make_shared(std::make_shared(format), sample); } @@ -191,7 +213,7 @@ InputFormatPtr FormatFactory::getInputFormat( throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); const Settings & settings = context.getSettingsRef(); - FormatSettings format_settings = getInputFormatSetting(settings); + FormatSettings format_settings = getInputFormatSetting(settings, context); RowInputFormatParams params; params.max_block_size = max_block_size; @@ -201,7 +223,13 @@ InputFormatPtr FormatFactory::getInputFormat( params.max_execution_time = settings.max_execution_time; params.timeout_overflow_mode = settings.timeout_overflow_mode; - return input_getter(buf, sample, context, params, format_settings); + auto format = input_getter(buf, sample, params, format_settings); + + /// It's a kludge. Because I cannot remove context from values format. + if (auto * values = typeid_cast(format.get())) + values->setContext(context); + + return format; } @@ -213,12 +241,18 @@ OutputFormatPtr FormatFactory::getOutputFormat( throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); const Settings & settings = context.getSettingsRef(); - FormatSettings format_settings = getOutputFormatSetting(settings); + FormatSettings format_settings = getOutputFormatSetting(settings, context); /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. */ - return output_getter(buf, sample, context, callback, format_settings); + auto format = output_getter(buf, sample, std::move(callback), format_settings); + + /// It's a kludge. Because I cannot remove context from MySQL format. + if (auto * mysql = typeid_cast(format.get())) + mysql->setContext(context); + + return format; } @@ -259,7 +293,7 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm auto & target = dict[name].file_segmentation_engine; if (target) throw Exception("FormatFactory: File segmentation engine " + name + " is already registered", ErrorCodes::LOGICAL_ERROR); - target = file_segmentation_engine; + target = std::move(file_segmentation_engine); } FormatFactory::FormatFactory() diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index ee2cf3ee444..cbf64afeaec 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -59,7 +59,6 @@ private: using InputCreator = std::function; @@ -67,21 +66,18 @@ private: using OutputCreator = std::function; using InputProcessorCreator = std::function; using OutputProcessorCreator = std::function; diff --git a/dbms/src/Formats/FormatSchemaInfo.cpp b/dbms/src/Formats/FormatSchemaInfo.cpp index fab8fc7fa63..707f9babe8d 100644 --- a/dbms/src/Formats/FormatSchemaInfo.cpp +++ b/dbms/src/Formats/FormatSchemaInfo.cpp @@ -26,7 +26,7 @@ namespace } -FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & format_schema, const String & format, bool require_message) +FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path) { if (format_schema.empty()) throw Exception( @@ -54,29 +54,25 @@ FormatSchemaInfo::FormatSchemaInfo(const Context & context, const String & forma else path.assign(format_schema).makeFile().getFileName(); - auto default_schema_directory = [&context]() + auto default_schema_directory = [&format_schema_path]() { - static const String str = Poco::Path(context.getFormatSchemaPath()).makeAbsolute().makeDirectory().toString(); + static const String str = Poco::Path(format_schema_path).makeAbsolute().makeDirectory().toString(); return str; }; - auto is_server = [&context]() - { - return context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER); - }; if (path.getExtension().empty() && !default_file_extension.empty()) path.setExtension(default_file_extension); if (path.isAbsolute()) { - if (is_server()) + if (is_server) throw Exception("Absolute path in the 'format_schema' setting is prohibited: " + path.toString(), ErrorCodes::BAD_ARGUMENTS); schema_path = path.getFileName(); schema_directory = path.makeParent().toString(); } else if (path.depth() >= 1 && path.directory(0) == "..") { - if (is_server()) + if (is_server) throw Exception( "Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: " + path.toString(), ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Formats/FormatSchemaInfo.h b/dbms/src/Formats/FormatSchemaInfo.h index 3360698c81f..7af0d56a0cf 100644 --- a/dbms/src/Formats/FormatSchemaInfo.h +++ b/dbms/src/Formats/FormatSchemaInfo.h @@ -10,7 +10,7 @@ class Context; class FormatSchemaInfo { public: - FormatSchemaInfo(const Context & context, const String & format_schema, const String & format, bool require_message); + FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } diff --git a/dbms/src/Formats/FormatSettings.h b/dbms/src/Formats/FormatSettings.h index dfd5d5b86f6..6219edf6e6d 100644 --- a/dbms/src/Formats/FormatSettings.h +++ b/dbms/src/Formats/FormatSettings.h @@ -89,6 +89,27 @@ struct FormatSettings UInt64 row_group_size = 1000000; } parquet; + struct Schema + { + std::string format_schema; + std::string format_schema_path; + bool is_server = false; + }; + + Schema schema; + + struct Custom + { + std::string result_before_delimiter; + std::string result_after_delimiter; + std::string row_before_delimiter; + std::string row_after_delimiter; + std::string row_between_delimiter; + std::string field_delimiter; + std::string escaping_rule; + }; + + Custom custom; }; } diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp index 11835c01123..f9cafbe5459 100644 --- a/dbms/src/Formats/NativeFormat.cpp +++ b/dbms/src/Formats/NativeFormat.cpp @@ -11,7 +11,6 @@ void registerInputFormatNative(FormatFactory & factory) factory.registerInputFormat("Native", []( ReadBuffer & buf, const Block & sample, - const Context &, UInt64 /* max_block_size */, FormatFactory::ReadCallback /* callback */, const FormatSettings &) @@ -25,7 +24,6 @@ void registerOutputFormatNative(FormatFactory & factory) factory.registerOutputFormat("Native", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings &) { diff --git a/dbms/src/Formats/NullFormat.cpp b/dbms/src/Formats/NullFormat.cpp index d5376b70fe1..c07723211eb 100644 --- a/dbms/src/Formats/NullFormat.cpp +++ b/dbms/src/Formats/NullFormat.cpp @@ -10,7 +10,6 @@ void registerOutputFormatNull(FormatFactory & factory) factory.registerOutputFormat("Null", []( WriteBuffer &, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings &) { diff --git a/dbms/src/Formats/ParsedTemplateFormatString.cpp b/dbms/src/Formats/ParsedTemplateFormatString.cpp index d6773970c0c..981d43089a2 100644 --- a/dbms/src/Formats/ParsedTemplateFormatString.cpp +++ b/dbms/src/Formats/ParsedTemplateFormatString.cpp @@ -234,36 +234,32 @@ void ParsedTemplateFormatString::throwInvalidFormat(const String & message, size ErrorCodes::INVALID_TEMPLATE_FORMAT); } -ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(const Context & context) +ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(const FormatSettings::Custom & settings) { - const Settings & settings = context.getSettingsRef(); - /// Set resultset format to "result_before_delimiter ${data} result_after_delimiter" ParsedTemplateFormatString resultset_format; - resultset_format.delimiters.emplace_back(settings.format_custom_result_before_delimiter); - resultset_format.delimiters.emplace_back(settings.format_custom_result_after_delimiter); + resultset_format.delimiters.emplace_back(settings.result_before_delimiter); + resultset_format.delimiters.emplace_back(settings.result_after_delimiter); resultset_format.formats.emplace_back(ParsedTemplateFormatString::ColumnFormat::None); resultset_format.format_idx_to_column_idx.emplace_back(0); resultset_format.column_names.emplace_back("data"); return resultset_format; } -ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedRowFormat(const Context & context, const Block & sample) +ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedRowFormat(const FormatSettings::Custom & settings, const Block & sample) { - const Settings & settings = context.getSettingsRef(); - /// Set row format to /// "row_before_delimiter ${Col0:escaping} field_delimiter ${Col1:escaping} field_delimiter ... ${ColN:escaping} row_after_delimiter" - ParsedTemplateFormatString::ColumnFormat escaping = ParsedTemplateFormatString::stringToFormat(settings.format_custom_escaping_rule); + ParsedTemplateFormatString::ColumnFormat escaping = ParsedTemplateFormatString::stringToFormat(settings.escaping_rule); ParsedTemplateFormatString row_format; - row_format.delimiters.emplace_back(settings.format_custom_row_before_delimiter); + row_format.delimiters.emplace_back(settings.row_before_delimiter); for (size_t i = 0; i < sample.columns(); ++i) { row_format.formats.emplace_back(escaping); row_format.format_idx_to_column_idx.emplace_back(i); row_format.column_names.emplace_back(sample.getByPosition(i).name); bool last_column = i == sample.columns() - 1; - row_format.delimiters.emplace_back(last_column ? settings.format_custom_row_after_delimiter : settings.format_custom_field_delimiter); + row_format.delimiters.emplace_back(last_column ? settings.row_after_delimiter : settings.field_delimiter); } return row_format; } diff --git a/dbms/src/Formats/ParsedTemplateFormatString.h b/dbms/src/Formats/ParsedTemplateFormatString.h index cb751d1412a..2da8a074679 100644 --- a/dbms/src/Formats/ParsedTemplateFormatString.h +++ b/dbms/src/Formats/ParsedTemplateFormatString.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -49,8 +50,8 @@ struct ParsedTemplateFormatString String dump() const; [[noreturn]] void throwInvalidFormat(const String & message, size_t column) const; - static ParsedTemplateFormatString setupCustomSeparatedResultsetFormat(const Context & context); - static ParsedTemplateFormatString setupCustomSeparatedRowFormat(const Context & context, const Block & sample); + static ParsedTemplateFormatString setupCustomSeparatedResultsetFormat(const FormatSettings::Custom & settings); + static ParsedTemplateFormatString setupCustomSeparatedRowFormat(const FormatSettings::Custom & settings, const Block & sample); }; } diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp index 4e6e0fe6e29..1d6a24d99b4 100644 --- a/dbms/src/Functions/FunctionsConversion.cpp +++ b/dbms/src/Functions/FunctionsConversion.cpp @@ -5,29 +5,6 @@ namespace DB { -void throwExceptionForIncompletelyParsedValue( - ReadBuffer & read_buffer, Block & block, size_t result) -{ - const IDataType & to_type = *block.getByPosition(result).type; - - WriteBufferFromOwnString message_buf; - message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) - << " as " << to_type.getName() - << ": syntax error"; - - if (read_buffer.offset()) - message_buf << " at position " << read_buffer.offset() - << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; - else - message_buf << " at begin of string"; - - if (isNativeNumber(to_type)) - message_buf << ". Note: there are to" << to_type.getName() << "OrZero and to" << to_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; - - throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT); -} - - void registerFunctionsConversion(FunctionFactory & factory) { factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index f6d6f615f0c..1f0360b3f1b 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -501,7 +501,26 @@ inline bool tryParseImpl(DataTypeDateTime::FieldType & x, Read /** Throw exception with verbose message when string value is not parsed completely. */ -[[noreturn]] void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result); +[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result) +{ + const IDataType & to_type = *block.getByPosition(result).type; + + WriteBufferFromOwnString message_buf; + message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) + << " as " << to_type.getName() + << ": syntax error"; + + if (read_buffer.offset()) + message_buf << " at position " << read_buffer.offset() + << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; + else + message_buf << " at begin of string"; + + if (isNativeNumber(to_type)) + message_buf << ". Note: there are to" << to_type.getName() << "OrZero and to" << to_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; + + throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT); +} enum class ConvertFromStringExceptionMode @@ -886,6 +905,7 @@ public: static constexpr bool to_datetime64 = std::is_same_v; static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -1083,6 +1103,7 @@ public: std::is_same_v>; static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -1231,6 +1252,7 @@ class FunctionToFixedString : public IFunction public: static constexpr auto name = "toFixedString"; static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -1686,9 +1708,9 @@ public: using WrapperType = std::function; using MonotonicityForRange = std::function; - FunctionCast(const Context & context_, const char * name_, MonotonicityForRange && monotonicity_for_range_ + FunctionCast(const char * name_, MonotonicityForRange && monotonicity_for_range_ , const DataTypes & argument_types_, const DataTypePtr & return_type_) - : context(context_), name(name_), monotonicity_for_range(monotonicity_for_range_) + : name(name_), monotonicity_for_range(monotonicity_for_range_) , argument_types(argument_types_), return_type(return_type_) { } @@ -1719,7 +1741,6 @@ public: private: - const Context & context; const char * name; MonotonicityForRange monotonicity_for_range; @@ -1735,10 +1756,10 @@ private: { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. - function = FunctionConvertFromString::create(context); + function = FunctionConvertFromString::create(); } else - function = FunctionTo::Type::create(context); + function = FunctionTo::Type::create(); auto function_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(function)) @@ -1752,7 +1773,7 @@ private: WrapperType createStringWrapper(const DataTypePtr & from_type) const { - FunctionPtr function = FunctionToString::create(context); + FunctionPtr function = FunctionToString::create(); auto function_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(function)) @@ -1780,7 +1801,7 @@ private: if (requested_result_is_nullable) throw Exception{"CAST AS Nullable(UUID) is not implemented", ErrorCodes::NOT_IMPLEMENTED}; - FunctionPtr function = FunctionTo::Type::create(context); + FunctionPtr function = FunctionTo::Type::create(); auto function_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(function)) @@ -1985,7 +2006,7 @@ private: return createStringToEnumWrapper(); else if (isNativeNumber(from_type) || isEnum(from_type)) { - auto function = Function::create(context); + auto function = Function::create(); auto func_or_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(function)) .build(ColumnsWithTypeAndName{{nullptr, from_type, "" }}); @@ -2337,9 +2358,10 @@ public: using MonotonicityForRange = FunctionCast::MonotonicityForRange; static constexpr auto name = "CAST"; - static FunctionOverloadResolverImplPtr create(const Context & context) { return std::make_unique(context); } + static FunctionOverloadResolverImplPtr create(const Context &) { return createImpl(); } + static FunctionOverloadResolverImplPtr createImpl() { return std::make_unique(); } - CastOverloadResolver(const Context & context_) : context(context_) {} + CastOverloadResolver() {} String getName() const override { return name; } @@ -2357,7 +2379,7 @@ protected: data_types[i] = arguments[i].type; auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique(context, name, std::move(monotonicity), data_types, return_type); + return std::make_unique(name, std::move(monotonicity), data_types, return_type); } DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override @@ -2418,8 +2440,6 @@ private: /// other types like Null, FixedString, Array and Tuple have no monotonicity defined return {}; } - - const Context & context; }; } diff --git a/dbms/src/Interpreters/castColumn.cpp b/dbms/src/Interpreters/castColumn.cpp index bbbf82a681a..dd281540b51 100644 --- a/dbms/src/Interpreters/castColumn.cpp +++ b/dbms/src/Interpreters/castColumn.cpp @@ -2,13 +2,14 @@ #include #include #include -#include +#include +#include namespace DB { -ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, const Context & context) +ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type) { if (arg.type->equals(*type)) return arg.column; @@ -28,7 +29,7 @@ ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type } }; - FunctionOverloadResolverPtr func_builder_cast = FunctionFactory::instance().get("CAST", context); + FunctionOverloadResolverPtr func_builder_cast = std::make_shared(CastOverloadResolver::createImpl()); ColumnsWithTypeAndName arguments{ temporary_block.getByPosition(0), temporary_block.getByPosition(1) }; auto func_cast = func_builder_cast->build(arguments); @@ -37,4 +38,9 @@ ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type return temporary_block.getByPosition(2).column; } +ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, const Context &) +{ + return castColumn(arg, type); +} + } diff --git a/dbms/src/Interpreters/castColumn.h b/dbms/src/Interpreters/castColumn.h index f8efd7bb1da..28914f34977 100644 --- a/dbms/src/Interpreters/castColumn.h +++ b/dbms/src/Interpreters/castColumn.h @@ -6,7 +6,7 @@ namespace DB { - +ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type); ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, const Context & context); } diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index edb8d5c15f4..b7ff829922b 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -248,7 +248,7 @@ namespace DB void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, - int &row_group_current, const Context &context, std::string format_name) + int &row_group_current, std::string format_name) { Columns columns_list; UInt64 num_rows = 0; @@ -389,7 +389,7 @@ namespace DB else column.column = std::move(read_column); - column.column = castColumn(column, column_type, context); + column.column = castColumn(column, column_type); column.type = column_type; num_rows = column.column->size(); columns_list.push_back(std::move(column.column)); diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 34b58a80091..720b4df47cc 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -39,7 +39,7 @@ namespace DB static void arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, - int &row_group_current, const Context &context, std::string format_name); + int &row_group_current, std::string format_name); }; } #endif diff --git a/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index 53e00d295f1..f49f521d474 100644 --- a/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -61,7 +61,6 @@ void registerInputFormatProcessorRowBinary(FormatFactory & factory) factory.registerInputFormatProcessor("RowBinary", []( ReadBuffer & buf, const Block & sample, - const Context &, const IRowInputFormat::Params & params, const FormatSettings &) { @@ -71,7 +70,6 @@ void registerInputFormatProcessorRowBinary(FormatFactory & factory) factory.registerInputFormatProcessor("RowBinaryWithNamesAndTypes", []( ReadBuffer & buf, const Block & sample, - const Context &, const IRowInputFormat::Params & params, const FormatSettings &) { diff --git a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index 1bbdfbf93fc..726a9ca45e8 100644 --- a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -52,7 +52,6 @@ void registerOutputFormatProcessorRowBinary(FormatFactory & factory) factory.registerOutputFormatProcessor("RowBinary", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings &) { @@ -62,7 +61,6 @@ void registerOutputFormatProcessorRowBinary(FormatFactory & factory) factory.registerOutputFormatProcessor("RowBinaryWithNamesAndTypes", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings &) { diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 3dc373109be..e5920f33dc6 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -421,7 +421,6 @@ void registerInputFormatProcessorCSV(FormatFactory & factory) factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=]( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp index 081dcf890a0..45962e9779c 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp @@ -76,7 +76,6 @@ void registerOutputFormatProcessorCSV(FormatFactory & factory) factory.registerOutputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=]( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index be4e6eaaf3f..afa9f4d4fa0 100644 --- a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -301,10 +301,11 @@ void registerInputFormatProcessorCapnProto(FormatFactory & factory) { factory.registerInputFormatProcessor( "CapnProto", - [](ReadBuffer & buf, const Block & sample, const Context & context, IRowInputFormat::Params params, const FormatSettings &) + [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(context, context.getSettingsRef().format_schema, "CapnProto", true)); + FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true, + settings.schema.is_server, settings.schema.format_schema_path)); }); } diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index d4530e7b09d..61a5f649ed1 100644 --- a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -217,7 +217,6 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory) factory.registerInputFormatProcessor("JSONCompactEachRow", []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { @@ -227,7 +226,6 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory) factory.registerInputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index 433cc4515ae..02b7cc220a0 100644 --- a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -94,7 +94,6 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory) factory.registerOutputFormatProcessor("JSONCompactEachRow", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { @@ -104,7 +103,6 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory) factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", []( WriteBuffer &buf, const Block &sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings &format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index 011054dfce7..7e56a4643da 100644 --- a/dbms/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -80,7 +80,6 @@ void registerOutputFormatProcessorJSONCompact(FormatFactory & factory) factory.registerOutputFormatProcessor("JSONCompact", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 1ffe50d87e6..a597af956b6 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -271,7 +271,6 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory) factory.registerInputFormatProcessor("JSONEachRow", []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp index 513420d29b4..f862a07173b 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp @@ -56,7 +56,6 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory) factory.registerOutputFormatProcessor("JSONEachRow", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp index 33fc1c9920b..a611b5a129b 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp @@ -33,7 +33,6 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor factory.registerOutputFormatProcessor("JSONEachRowWithProgress", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 90cff3f1498..c23f3812c0b 100644 --- a/dbms/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -246,7 +246,6 @@ void registerOutputFormatProcessorJSON(FormatFactory & factory) factory.registerOutputFormatProcessor("JSON", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index f913087da9b..b04de32ca5a 100644 --- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -12,13 +12,10 @@ namespace DB using namespace MySQLProtocol; -MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const Context & context_, const FormatSettings & settings_) +MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) : IOutputFormat(header_, out_) - , context(context_) - , packet_sender(out, const_cast(context_.mysql.sequence_id)) /// TODO: fix it , format_settings(settings_) { - packet_sender.max_packet_size = context_.mysql.max_packet_size; } void MySQLOutputFormat::initialize() @@ -32,17 +29,17 @@ void MySQLOutputFormat::initialize() if (header.columns()) { - packet_sender.sendPacket(LengthEncodedNumber(header.columns())); + packet_sender->sendPacket(LengthEncodedNumber(header.columns())); for (size_t i = 0; i < header.columns(); i++) { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; - packet_sender.sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); + packet_sender->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } - if (!(context.mysql.client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) + if (!(context->mysql.client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) { - packet_sender.sendPacket(EOF_Packet(0, 0)); + packet_sender->sendPacket(EOF_Packet(0, 0)); } } } @@ -53,7 +50,7 @@ void MySQLOutputFormat::consume(Chunk chunk) for (size_t i = 0; i < chunk.getNumRows(); i++) { ProtocolText::ResultsetRow row_packet(data_types, chunk.getColumns(), i); - packet_sender.sendPacket(row_packet); + packet_sender->sendPacket(row_packet); } } @@ -61,7 +58,7 @@ void MySQLOutputFormat::finalize() { size_t affected_rows = 0; std::stringstream human_readable_info; - if (QueryStatus * process_list_elem = context.getProcessListElement()) + if (QueryStatus * process_list_elem = context->getProcessListElement()) { CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(); @@ -74,17 +71,17 @@ void MySQLOutputFormat::finalize() const auto & header = getPort(PortKind::Main).getHeader(); if (header.columns() == 0) - packet_sender.sendPacket(OK_Packet(0x0, context.mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info.str()), true); + packet_sender->sendPacket(OK_Packet(0x0, context->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info.str()), true); else - if (context.mysql.client_capabilities & CLIENT_DEPRECATE_EOF) - packet_sender.sendPacket(OK_Packet(0xfe, context.mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info.str()), true); + if (context->mysql.client_capabilities & CLIENT_DEPRECATE_EOF) + packet_sender->sendPacket(OK_Packet(0xfe, context->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info.str()), true); else - packet_sender.sendPacket(EOF_Packet(0, 0), true); + packet_sender->sendPacket(EOF_Packet(0, 0), true); } void MySQLOutputFormat::flush() { - packet_sender.out->next(); + packet_sender->out->next(); } void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory) @@ -93,9 +90,8 @@ void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory) "MySQLWire", [](WriteBuffer & buf, const Block & sample, - const Context & context, FormatFactory::WriteCallback, - const FormatSettings & settings) { return std::make_shared(buf, sample, context, settings); }); + const FormatSettings & settings) { return std::make_shared(buf, sample, settings); }); } } diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h index 780a0c4ea05..2604e7fc42e 100644 --- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -16,13 +16,20 @@ class Context; /** A stream for outputting data in a binary line-by-line format. */ -class MySQLOutputFormat: public IOutputFormat +class MySQLOutputFormat final : public IOutputFormat { public: - MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const Context & context_, const FormatSettings & settings_); + MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_); String getName() const override { return "MySQLOutputFormat"; } + void setContext(const Context & context_) + { + context = &context_; + packet_sender = std::make_unique(out, const_cast(context_.mysql.sequence_id)); /// TODO: fix it + packet_sender->max_packet_size = context_.mysql.max_packet_size; + } + void consume(Chunk) override; void finalize() override; void flush() override; @@ -34,8 +41,8 @@ private: bool initialized = false; - const Context & context; - MySQLProtocol::PacketSender packet_sender; + const Context * context = nullptr; + std::unique_ptr packet_sender; FormatSettings format_settings; DataTypes data_types; }; diff --git a/dbms/src/Processors/Formats/Impl/NativeFormat.cpp b/dbms/src/Processors/Formats/Impl/NativeFormat.cpp index 846d1c3dbc6..bbc0d05272b 100644 --- a/dbms/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/NativeFormat.cpp @@ -156,7 +156,6 @@ void registerInputFormatProcessorNative(FormatFactory & factory) factory.registerInputFormatProcessor("Native", []( ReadBuffer & buf, const Block & sample, - const Context &, const RowInputFormatParams &, const FormatSettings &) { @@ -169,7 +168,6 @@ void registerOutputFormatProcessorNative(FormatFactory & factory) factory.registerOutputFormatProcessor("Native", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings &) { diff --git a/dbms/src/Processors/Formats/Impl/NullFormat.cpp b/dbms/src/Processors/Formats/Impl/NullFormat.cpp index cf1dc7186ab..c8c773e7acf 100644 --- a/dbms/src/Processors/Formats/Impl/NullFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/NullFormat.cpp @@ -21,7 +21,6 @@ void registerOutputFormatProcessorNull(FormatFactory & factory) factory.registerOutputFormatProcessor("Null", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings &) { diff --git a/dbms/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index 9ada3d16516..5cad02cf2cf 100644 --- a/dbms/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -107,7 +107,7 @@ void ODBCDriver2BlockOutputFormat::writePrefix() void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory) { factory.registerOutputFormatProcessor( - "ODBCDriver2", [](WriteBuffer & buf, const Block & sample, const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) + "ODBCDriver2", [](WriteBuffer & buf, const Block & sample, FormatFactory::WriteCallback, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); }); diff --git a/dbms/src/Processors/Formats/Impl/ODBCDriverBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/ODBCDriverBlockOutputFormat.cpp index 467bfb614e3..eaccc47346d 100644 --- a/dbms/src/Processors/Formats/Impl/ODBCDriverBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ODBCDriverBlockOutputFormat.cpp @@ -69,7 +69,6 @@ void registerOutputFormatProcessorODBCDriver(FormatFactory & factory) factory.registerOutputFormatProcessor("ODBCDriver", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index c368e979495..6049420cac0 100644 --- a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -12,8 +12,8 @@ namespace DB { - ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer &in_, Block header_, const Context &context_) - : IInputFormat(std::move(header_), in_), context{context_} { + ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer &in_, Block header_) : IInputFormat(std::move(header_), in_) + { } Chunk ORCBlockInputFormat::generate() @@ -57,7 +57,7 @@ namespace DB arrow::Status read_status = file_reader->Read(&table); - ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context, "ORC"); + ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, "ORC"); return res; } @@ -78,11 +78,10 @@ namespace DB "ORC", [](ReadBuffer &buf, const Block &sample, - const Context &context, const RowInputFormatParams &, const FormatSettings & /* settings */) { - return std::make_shared(buf, sample, context); + return std::make_shared(buf, sample); }); } diff --git a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 331d60af9dd..c0c688f0158 100644 --- a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -17,7 +17,7 @@ class Context; class ORCBlockInputFormat: public IInputFormat { public: - ORCBlockInputFormat(ReadBuffer & in_, Block header_, const Context & context_); + ORCBlockInputFormat(ReadBuffer & in_, Block header_); String getName() const override { return "ORCBlockInputFormat"; } @@ -30,8 +30,6 @@ private: // TODO: check that this class implements every part of its parent - const Context & context; - std::unique_ptr file_reader; std::string file_data; int row_group_total = 0; diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 55c2ebccf41..3c9b50c2c75 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -15,8 +15,9 @@ namespace DB { - ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer &in_, Block header_, const Context &context_) - : IInputFormat(std::move(header_), in_), context{context_} { + ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_) + : IInputFormat(std::move(header_), in_) + { } Chunk ParquetBlockInputFormat::generate() @@ -59,7 +60,7 @@ namespace DB std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, &table); - ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, context, "Parquet"); + ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, "Parquet"); return res; } @@ -80,11 +81,10 @@ namespace DB "Parquet", [](ReadBuffer &buf, const Block &sample, - const Context &context, const RowInputFormatParams &, const FormatSettings & /* settings */) { - return std::make_shared(buf, sample, context); + return std::make_shared(buf, sample); }); } diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 94489f8fbaa..665665557a5 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -16,7 +16,7 @@ class Context; class ParquetBlockInputFormat: public IInputFormat { public: - ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const Context & context_); + ParquetBlockInputFormat(ReadBuffer & in_, Block header_); void resetParser() override; @@ -30,8 +30,6 @@ private: // TODO: check that this class implements every part of its parent - const Context & context; - std::unique_ptr file_reader; std::string file_data; std::unique_ptr buffer; diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index f7092bc9a5b..855036e05ff 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -426,7 +426,6 @@ void registerOutputFormatProcessorParquet(FormatFactory & factory) "Parquet", [](WriteBuffer & buf, const Block & sample, - const Context & /*context*/, FormatFactory::WriteCallback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 5a0a43f487a..c0b1b2e4ffc 100644 --- a/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -260,7 +260,6 @@ void registerOutputFormatProcessorPretty(FormatFactory & factory) factory.registerOutputFormatProcessor("Pretty", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { @@ -270,7 +269,6 @@ void registerOutputFormatProcessorPretty(FormatFactory & factory) factory.registerOutputFormatProcessor("PrettyNoEscapes", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 10d475f599a..84e222d073c 100644 --- a/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -134,7 +134,6 @@ void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory) factory.registerOutputFormatProcessor("PrettyCompact", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { @@ -144,7 +143,6 @@ void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory) factory.registerOutputFormatProcessor("PrettyCompactNoEscapes", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { @@ -157,7 +155,6 @@ void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory) // factory.registerOutputFormat("PrettyCompactMonoBlock", []( // WriteBuffer & buf, // const Block & sample, -// const Context &, // const FormatSettings & format_settings) // { // BlockOutputStreamPtr impl = std::make_shared(buf, sample, format_settings); diff --git a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 85655a33004..8b852b3d194 100644 --- a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -97,7 +97,6 @@ void registerOutputFormatProcessorPrettySpace(FormatFactory & factory) factory.registerOutputFormatProcessor("PrettySpace", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { @@ -107,7 +106,6 @@ void registerOutputFormatProcessorPrettySpace(FormatFactory & factory) factory.registerOutputFormatProcessor("PrettySpaceNoEscapes", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback, const FormatSettings & format_settings) { diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 6fce4437219..50d9cef9bfc 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -70,12 +70,12 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory) factory.registerInputFormatProcessor("Protobuf", []( ReadBuffer & buf, const Block & sample, - const Context & context, IRowInputFormat::Params params, - const FormatSettings &) + const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(context, context.getSettingsRef().format_schema, "Protobuf", true)); + FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, + settings.schema.is_server, settings.schema.format_schema_path)); }); } diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index d1007492203..da567e60475 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -50,12 +50,12 @@ void registerOutputFormatProcessorProtobuf(FormatFactory & factory) "Protobuf", [](WriteBuffer & buf, const Block & header, - const Context & context, FormatFactory::WriteCallback callback, - const FormatSettings &) + const FormatSettings & settings) { - return std::make_shared(buf, header, callback, - FormatSchemaInfo(context, context.getSettingsRef().format_schema, "Protobuf", true)); + return std::make_shared(buf, header, std::move(callback), + FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, + settings.schema.is_server, settings.schema.format_schema_path)); }); } diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index dcc7d64fcd2..1e4fa511983 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -210,7 +210,6 @@ void registerInputFormatProcessorTSKV(FormatFactory & factory) factory.registerInputFormatProcessor("TSKV", []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp index f1fbc72b9ee..a8f1acb59bb 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp @@ -45,7 +45,6 @@ void registerOutputFormatProcessorTSKV(FormatFactory & factory) factory.registerOutputFormatProcessor("TSKV", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index b578a2c07ce..d20d7802d5c 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -356,7 +356,6 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerInputFormatProcessor(name, []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { @@ -369,7 +368,6 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerInputFormatProcessor(name, []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { @@ -382,7 +380,6 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerInputFormatProcessor(name, []( ReadBuffer & buf, const Block & sample, - const Context &, IRowInputFormat::Params params, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp index ae3a1fb9af0..0acbbdcdfbe 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp @@ -78,7 +78,6 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerOutputFormatProcessor(name, []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { @@ -91,7 +90,6 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerOutputFormatProcessor(name, []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { @@ -104,7 +102,6 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerOutputFormatProcessor(name, []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { @@ -117,7 +114,6 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory) factory.registerOutputFormatProcessor(name, []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index aa7bfbeaff1..bd49b25934e 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -14,8 +14,10 @@ namespace ErrorCodes } TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_, - ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_) - : IOutputFormat(header_, out_), settings(settings_), format(std::move(format_)), row_format(std::move(row_format_)) + ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, + std::string row_between_delimiter_) + : IOutputFormat(header_, out_), settings(settings_), format(std::move(format_)) + , row_format(std::move(row_format_)), row_between_delimiter(std::move(row_between_delimiter_)) { auto & sample = getPort(PortKind::Main).getHeader(); size_t columns = sample.columns(); @@ -152,7 +154,7 @@ void TemplateBlockOutputFormat::consume(Chunk chunk) for (size_t i = 0; i < rows; ++i) { if (row_count) - writeString(settings.template_settings.row_between_delimiter, out); + writeString(row_between_delimiter, out); writeRow(chunk, i); ++row_count; @@ -230,7 +232,6 @@ void registerOutputFormatProcessorTemplate(FormatFactory & factory) factory.registerOutputFormatProcessor("Template", []( WriteBuffer & buf, const Block & sample, - const Context & context, FormatFactory::WriteCallback, const FormatSettings & settings) { @@ -247,7 +248,8 @@ void registerOutputFormatProcessorTemplate(FormatFactory & factory) { /// Read format string from file resultset_format = ParsedTemplateFormatString( - FormatSchemaInfo(context, settings.template_settings.resultset_format, "Template", false), + FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false, + settings.schema.is_server, settings.schema.format_schema_path), [&](const String & partName) { return static_cast(TemplateBlockOutputFormat::stringToResultsetPart(partName)); @@ -255,28 +257,26 @@ void registerOutputFormatProcessorTemplate(FormatFactory & factory) } ParsedTemplateFormatString row_format = ParsedTemplateFormatString( - FormatSchemaInfo(context, settings.template_settings.row_format, "Template", false), + FormatSchemaInfo(settings.template_settings.row_format, "Template", false, + settings.schema.is_server, settings.schema.format_schema_path), [&](const String & colName) { return sample.getPositionByName(colName); }); - return std::make_shared(sample, buf, settings, resultset_format, row_format); + return std::make_shared(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter); }); factory.registerOutputFormatProcessor("CustomSeparated", []( WriteBuffer & buf, const Block & sample, - const Context & context, FormatFactory::WriteCallback, const FormatSettings & settings) { - ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(context); - ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(context, sample); - FormatSettings format_settings = settings; - format_settings.template_settings.row_between_delimiter = context.getSettingsRef().format_custom_row_between_delimiter; + ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(settings.custom); + ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(settings.custom, sample); - return std::make_shared(sample, buf, format_settings, resultset_format, row_format); + return std::make_shared(sample, buf, settings, resultset_format, row_format, settings.custom.row_between_delimiter); }); } } diff --git a/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h b/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h index 25a6a832bc8..f29d31eb3f1 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h @@ -15,7 +15,8 @@ class TemplateBlockOutputFormat : public IOutputFormat using ColumnFormat = ParsedTemplateFormatString::ColumnFormat; public: TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_, - ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_); + ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, + std::string row_between_delimiter_); String getName() const override { return "TemplateBlockOutputFormat"; } @@ -65,6 +66,8 @@ protected: size_t row_count = 0; bool need_write_prefix = true; + + std::string row_between_delimiter; }; } diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index d4de40eddec..9b007d9f29b 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -20,12 +20,13 @@ extern const int SYNTAX_ERROR; TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - const FormatSettings & settings_, bool ignore_spaces_, - ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_) + FormatSettings settings_, bool ignore_spaces_, + ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, + std::string row_between_delimiter_) : RowInputFormatWithDiagnosticInfo(header_, buf, params_), buf(in_), data_types(header_.getDataTypes()), - settings(settings_), ignore_spaces(ignore_spaces_), + settings(std::move(settings_)), ignore_spaces(ignore_spaces_), format(std::move(format_)), row_format(std::move(row_format_)), - default_csv_delimiter(settings.csv.delimiter) + default_csv_delimiter(settings.csv.delimiter), row_between_delimiter(std::move(row_between_delimiter_)) { /// Validate format string for result set bool has_data = false; @@ -160,7 +161,7 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension updateDiagnosticInfo(); if (likely(row_num != 1)) - assertString(settings.template_settings.row_between_delimiter, buf); + assertString(row_between_delimiter, buf); extra.read_columns.assign(columns.size(), false); @@ -339,11 +340,11 @@ bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & col try { if (likely(row_num != 1)) - assertString(settings.template_settings.row_between_delimiter, buf); + assertString(row_between_delimiter, buf); } catch (const DB::Exception &) { - writeErrorStringForWrongDelimiter(out, "delimiter between rows", settings.template_settings.row_between_delimiter); + writeErrorStringForWrongDelimiter(out, "delimiter between rows", row_between_delimiter); return false; } @@ -428,7 +429,7 @@ bool TemplateRowInputFormat::isGarbageAfterField(size_t, ReadBuffer::Position) bool TemplateRowInputFormat::allowSyncAfterError() const { - return !row_format.delimiters.back().empty() || !settings.template_settings.row_between_delimiter.empty(); + return !row_format.delimiters.back().empty() || !row_between_delimiter.empty(); } void TemplateRowInputFormat::syncAfterError() @@ -450,10 +451,10 @@ void TemplateRowInputFormat::syncAfterError() bool last_delimiter_in_row_found = !row_format.delimiters.back().empty(); - if (last_delimiter_in_row_found && checkString(settings.template_settings.row_between_delimiter, buf)) + if (last_delimiter_in_row_found && checkString(row_between_delimiter, buf)) at_beginning_of_row_or_eof = true; else - skipToNextDelimiterOrEof(settings.template_settings.row_between_delimiter); + skipToNextDelimiterOrEof(row_between_delimiter); if (buf.eof()) at_beginning_of_row_or_eof = end_of_stream = true; @@ -509,7 +510,6 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory) factory.registerInputFormatProcessor(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [=]( ReadBuffer & buf, const Block & sample, - const Context & context, IRowInputFormat::Params params, const FormatSettings & settings) { @@ -526,7 +526,8 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory) { /// Read format string from file resultset_format = ParsedTemplateFormatString( - FormatSchemaInfo(context, settings.template_settings.resultset_format, "Template", false), + FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false, + settings.schema.is_server, settings.schema.format_schema_path), [&](const String & partName) -> std::optional { if (partName == "data") @@ -537,13 +538,14 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory) } ParsedTemplateFormatString row_format = ParsedTemplateFormatString( - FormatSchemaInfo(context, settings.template_settings.row_format, "Template", false), + FormatSchemaInfo(settings.template_settings.row_format, "Template", false, + settings.schema.is_server, settings.schema.format_schema_path), [&](const String & colName) -> std::optional { return sample.getPositionByName(colName); }); - return std::make_shared(sample, buf, params, settings, ignore_spaces, resultset_format, row_format); + return std::make_shared(sample, buf, params, settings, ignore_spaces, resultset_format, row_format, settings.template_settings.row_between_delimiter); }); } @@ -552,16 +554,13 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory) factory.registerInputFormatProcessor(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", [=]( ReadBuffer & buf, const Block & sample, - const Context & context, IRowInputFormat::Params params, const FormatSettings & settings) { - ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(context); - ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(context, sample); - FormatSettings format_settings = settings; - format_settings.template_settings.row_between_delimiter = context.getSettingsRef().format_custom_row_between_delimiter; + ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(settings.custom); + ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(settings.custom, sample); - return std::make_shared(sample, buf, params, format_settings, ignore_spaces, resultset_format, row_format); + return std::make_shared(sample, buf, params, settings, ignore_spaces, resultset_format, row_format, settings.custom.row_between_delimiter); }); } } diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h index a9180c52606..4a39396399d 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -16,8 +16,9 @@ class TemplateRowInputFormat : public RowInputFormatWithDiagnosticInfo using ColumnFormat = ParsedTemplateFormatString::ColumnFormat; public: TemplateRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - const FormatSettings & settings_, bool ignore_spaces_, - ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_); + FormatSettings settings_, bool ignore_spaces_, + ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, + std::string row_between_delimiter); String getName() const override { return "TemplateRowInputFormat"; } @@ -61,6 +62,8 @@ private: bool end_of_stream = false; std::vector always_default_columns; char default_csv_delimiter; + + std::string row_between_delimiter; }; } diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index c42b638fb48..b324719527a 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -33,8 +33,8 @@ namespace ErrorCodes ValuesBlockInputFormat::ValuesBlockInputFormat(ReadBuffer & in_, const Block & header_, const RowInputFormatParams & params_, - const Context & context_, const FormatSettings & format_settings_) - : IInputFormat(header_, buf), buf(in_), params(params_), context(std::make_unique(context_)), + const FormatSettings & format_settings_) + : IInputFormat(header_, buf), buf(in_), params(params_), format_settings(format_settings_), num_columns(header_.columns()), parser_type_for_column(num_columns, ParserType::Streaming), attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns), @@ -424,11 +424,10 @@ void registerInputFormatProcessorValues(FormatFactory & factory) factory.registerInputFormatProcessor("Values", []( ReadBuffer & buf, const Block & header, - const Context & context, const RowInputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, header, params, context, settings); + return std::make_shared(buf, header, params, settings); }); } diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index 9c2473a0a04..059a15e1e86 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -18,7 +18,7 @@ class ReadBuffer; /** Stream to read data in VALUES format (as in INSERT query). */ -class ValuesBlockInputFormat : public IInputFormat +class ValuesBlockInputFormat final : public IInputFormat { public: /** Data is parsed using fast, streaming parser. @@ -29,12 +29,15 @@ public: * than interpreting expressions in each row separately, but it's still slower than streaming parsing) */ ValuesBlockInputFormat(ReadBuffer & in_, const Block & header_, const RowInputFormatParams & params_, - const Context & context_, const FormatSettings & format_settings_); + const FormatSettings & format_settings_); String getName() const override { return "ValuesBlockInputFormat"; } void resetParser() override; + /// TODO: remove context somehow. + void setContext(const Context & context_) { context = std::make_unique(context_); } + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } private: diff --git a/dbms/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp index e773cbf167c..6294a829456 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp @@ -46,7 +46,6 @@ void registerOutputFormatProcessorValues(FormatFactory & factory) factory.registerOutputFormatProcessor("Values", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp index 6e794c9a79b..ccef2a0898a 100644 --- a/dbms/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp @@ -168,7 +168,6 @@ void registerOutputFormatProcessorVertical(FormatFactory & factory) factory.registerOutputFormatProcessor("Vertical", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) { diff --git a/dbms/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index 07335fc6c49..9c2384691f6 100644 --- a/dbms/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -245,7 +245,6 @@ void registerOutputFormatProcessorXML(FormatFactory & factory) factory.registerOutputFormatProcessor("XML", []( WriteBuffer & buf, const Block & sample, - const Context &, FormatFactory::WriteCallback callback, const FormatSettings & settings) {