diff --git a/dbms/include/DB/Databases/DatabaseCloud.h b/dbms/include/DB/Databases/DatabaseCloud.h index c928cc324b3..27e787ac390 100644 --- a/dbms/include/DB/Databases/DatabaseCloud.h +++ b/dbms/include/DB/Databases/DatabaseCloud.h @@ -83,7 +83,7 @@ public: String getEngineName() const override { return "Cloud"; } - void loadTables(Context & context, ThreadPool * thread_pool) override; + void loadTables(Context & context, ThreadPool * thread_pool, bool has_force_restore_data_flag) override; bool isTableExist(const String & table_name) const override; StoragePtr tryGetTable(const String & table_name) override; diff --git a/dbms/include/DB/Databases/DatabaseOrdinary.h b/dbms/include/DB/Databases/DatabaseOrdinary.h index 71531667b59..c08b704872c 100644 --- a/dbms/include/DB/Databases/DatabaseOrdinary.h +++ b/dbms/include/DB/Databases/DatabaseOrdinary.h @@ -25,7 +25,7 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadTables(Context & context, ThreadPool * thread_pool) override; + void loadTables(Context & context, ThreadPool * thread_pool, bool has_force_restore_data_flag) override; bool isTableExist(const String & table_name) const override; StoragePtr tryGetTable(const String & table_name) override; diff --git a/dbms/include/DB/Databases/DatabasesCommon.h b/dbms/include/DB/Databases/DatabasesCommon.h index b91b79813b8..b60b42e3d84 100644 --- a/dbms/include/DB/Databases/DatabasesCommon.h +++ b/dbms/include/DB/Databases/DatabasesCommon.h @@ -27,6 +27,7 @@ std::pair createTableFromDefinition( const String & database_name, const String & database_data_path, Context & context, + bool has_force_restore_data_flag, const String & description_for_error_message); diff --git a/dbms/include/DB/Databases/IDatabase.h b/dbms/include/DB/Databases/IDatabase.h index 70ed04e2bca..39fcc02b22e 100644 --- a/dbms/include/DB/Databases/IDatabase.h +++ b/dbms/include/DB/Databases/IDatabase.h @@ -43,7 +43,7 @@ public: /// Загрузить множество существующих таблиц. Если задан thread_pool - использовать его. /// Можно вызывать только один раз, сразу после создания объекта. - virtual void loadTables(Context & context, ThreadPool * thread_pool) = 0; + virtual void loadTables(Context & context, ThreadPool * thread_pool, bool has_force_restore_data_flag) = 0; /// Проверить существование таблицы. virtual bool isTableExist(const String & name) const = 0; diff --git a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h index 653e10be9eb..51ad2532f5c 100644 --- a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h @@ -13,7 +13,8 @@ namespace DB class ASTCreateQuery; -/** Позволяет создать новую таблицу, или создать объект уже существующей таблицы, или создать БД, или создать объект уже существующей БД. +/** Allows to create new table or database, + * or create an object for existing table or database. */ class InterpreterCreateQuery : public IInterpreter { @@ -22,7 +23,7 @@ public: BlockIO execute() override; - /// Список столбцов с типами в AST. + /// List of columns and their types in AST. static ASTPtr formatColumns(const NamesAndTypesList & columns); static ASTPtr formatColumns( NamesAndTypesList columns, @@ -35,6 +36,11 @@ public: thread_pool = &thread_pool_; } + void setForceRestoreData(bool has_force_restore_data_flag_) + { + has_force_restore_data_flag = has_force_restore_data_flag_; + } + struct ColumnsInfo { NamesAndTypesListPtr columns = std::make_shared(); @@ -43,22 +49,25 @@ public: ColumnDefaults column_defaults; }; - /// Получить информацию о столбцах и типах их default-ов, для случая, когда столбцы в запросе create указаны явно. + /// Obtain information about columns, their types and default values, for case when columns in CREATE query is specified explicitly. static ColumnsInfo getColumnsInfo(const ASTPtr & columns, const Context & context); private: void createDatabase(ASTCreateQuery & create); BlockIO createTable(ASTCreateQuery & create); - /// Вычислить список столбцов таблицы и вернуть его. + /// Calculate list of columns of table and return it. ColumnsInfo setColumns(ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const; String setEngine(ASTCreateQuery & create, const StoragePtr & as_storage) const; ASTPtr query_ptr; Context context; - /// Используется при загрузке базы данных. + /// Using while loading database. ThreadPool * thread_pool = nullptr; + + /// Skip safety threshold when loading tables. + bool has_force_restore_data_flag = false; }; diff --git a/dbms/include/DB/Storages/StorageFactory.h b/dbms/include/DB/Storages/StorageFactory.h index 33d71fdb32f..a30e5f5357f 100644 --- a/dbms/include/DB/Storages/StorageFactory.h +++ b/dbms/include/DB/Storages/StorageFactory.h @@ -27,7 +27,8 @@ public: const NamesAndTypesList & materialized_columns, const NamesAndTypesList & alias_columns, const ColumnDefaults & column_defaults, - bool attach) const; + bool attach, + bool has_force_restore_data_flag) const; }; } diff --git a/dbms/include/DB/Storages/StorageMergeTree.h b/dbms/include/DB/Storages/StorageMergeTree.h index e196da57132..8a9aec7bd5a 100644 --- a/dbms/include/DB/Storages/StorageMergeTree.h +++ b/dbms/include/DB/Storages/StorageMergeTree.h @@ -41,6 +41,7 @@ public: const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_); void shutdown() override; @@ -183,6 +184,7 @@ private: const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_); /** Определяет, какие куски нужно объединять, и объединяет их. diff --git a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h index 12cf6101f40..34a1dd8645b 100644 --- a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h +++ b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h @@ -87,6 +87,7 @@ public: const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_); void shutdown() override; @@ -318,6 +319,7 @@ private: const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_); /// Инициализация. diff --git a/dbms/src/Databases/DatabaseCloud.cpp b/dbms/src/Databases/DatabaseCloud.cpp index a062cc45108..8a2335d075b 100644 --- a/dbms/src/Databases/DatabaseCloud.cpp +++ b/dbms/src/Databases/DatabaseCloud.cpp @@ -94,7 +94,7 @@ DatabaseCloud::DatabaseCloud( } -void loadTables(Context & context, ThreadPool * thread_pool) +void loadTables(Context & context, ThreadPool * thread_pool, bool has_force_restore_data_flag) { /// Ничего не делаем - все таблицы загружаются лениво. } @@ -411,7 +411,7 @@ StoragePtr DatabaseCloud::tryGetTable(const String & table_name) String table_name; StoragePtr table; std::tie(table_name, table) = createTableFromDefinition( - definition, name, data_path, context, + definition, name, data_path, context, false, "in zookeeper node " + zookeeper_path + "/table_definitions/" + hashToHex(table_hash)); local_tables_cache.emplace(table_name, table); diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index be6ae6cf87b..dfd1b1b8323 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -40,7 +40,8 @@ static void loadTable( DatabaseOrdinary & database, const String & database_name, const String & database_data_path, - const String & file_name) + const String & file_name, + bool has_force_restore_data_flag) { Logger * log = &Logger::get("loadTable"); @@ -68,7 +69,7 @@ static void loadTable( String table_name; StoragePtr table; std::tie(table_name, table) = createTableFromDefinition( - s, database_name, database_data_path, context, "in file " + table_metadata_path); + s, database_name, database_data_path, context, has_force_restore_data_flag, "in file " + table_metadata_path); database.attachTable(table_name, table); } catch (const Exception & e) @@ -87,7 +88,7 @@ DatabaseOrdinary::DatabaseOrdinary( } -void DatabaseOrdinary::loadTables(Context & context, ThreadPool * thread_pool) +void DatabaseOrdinary::loadTables(Context & context, ThreadPool * thread_pool, bool has_force_restore_data_flag) { log = &Logger::get("DatabaseOrdinary (" + name + ")"); @@ -97,7 +98,7 @@ void DatabaseOrdinary::loadTables(Context & context, ThreadPool * thread_pool) Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator dir_it(path); dir_it != dir_end; ++dir_it) { - /// Для директории .svn и файла .gitignore + /// For '.svn', '.gitignore' directory and similar. if (dir_it.name().at(0) == '.') continue; @@ -149,7 +150,7 @@ void DatabaseOrdinary::loadTables(Context & context, ThreadPool * thread_pool) watch.restart(); } - loadTable(context, path, *this, name, data_path, table); + loadTable(context, path, *this, name, data_path, table, has_force_restore_data_flag); } }; diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 0ace8733d84..03a0cb0d006 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -42,6 +42,7 @@ std::pair createTableFromDefinition( const String & database_name, const String & database_data_path, Context & context, + bool has_force_restore_data_flag, const String & description_for_error_message) { ParserCreateQuery parser; @@ -73,7 +74,7 @@ std::pair createTableFromDefinition( storage_name, database_data_path, ast_create_query.table, database_name, context, context.getGlobalContext(), ast, columns_info.columns, columns_info.materialized_columns, columns_info.alias_columns, columns_info.column_defaults, - true) + true, has_force_restore_data_flag) }; } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 2e2208541c0..9f95da151f3 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -134,7 +134,7 @@ void InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (need_write_metadata) Poco::File(metadata_file_tmp_path).renameTo(metadata_file_path); - database->loadTables(context, thread_pool); + database->loadTables(context, thread_pool, has_force_restore_data_flag); } catch (...) { @@ -507,7 +507,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) res = StorageFactory::instance().get( storage_name, data_path, table_name, database_name, context, context.getGlobalContext(), query_ptr, columns.columns, - columns.materialized_columns, columns.alias_columns, columns.column_defaults, create.attach); + columns.materialized_columns, columns.alias_columns, columns.column_defaults, create.attach, false); if (create.is_temporary) context.getSessionContext().addExternalTable(table_name, res); diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 7982bfc07f5..3bed3e92a82 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -28,7 +28,8 @@ static void executeCreateQuery( Context & context, const String & database, const String & file_name, - ThreadPool & pool) + ThreadPool & pool, + bool has_force_restore_data_flag) { ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "in file " + file_name); @@ -39,6 +40,7 @@ static void executeCreateQuery( InterpreterCreateQuery interpreter(ast, context); interpreter.setDatabaseLoadingThreadpool(pool); + interpreter.setForceRestoreData(has_force_restore_data_flag); interpreter.execute(); } @@ -47,24 +49,32 @@ void loadMetadata(Context & context) { String path = context.getPath() + "metadata"; - /// Используется для параллельной загрузки таблиц. + /** There may exist 'force_restore_data' file, that means, + * skip safety threshold on difference of data parts while initializing tables. + * This file is deleted after successful loading of tables. + * (flag is "one-shot") + */ + Poco::File force_restore_data_flag_file(context.getPath() + "flags/force_restore_data"); + bool has_force_restore_data_flag = force_restore_data_flag_file.exists(); + + /// For parallel tables loading. ThreadPool thread_pool(SettingMaxThreads().getAutoValue()); - /// Цикл по базам данных + /// Loop over databases. Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator it(path); it != dir_end; ++it) { if (!it->isDirectory()) continue; - /// Для директории .svn + /// For '.svn', '.gitignore' directory and similar. if (it.name().at(0) == '.') continue; String database = unescapeForFileName(it.name()); - /// Для базы данных может быть расположен .sql файл, где описан запрос на её создание. - /// А если такого файла нет, то создаётся база данных с движком по-умолчанию. + /// There may exist .sql file with database creation statement. + /// Or, if it is absent, then database with default engine is created. String database_attach_query; String database_metadata_file = it->path() + ".sql"; @@ -77,10 +87,13 @@ void loadMetadata(Context & context) else database_attach_query = "ATTACH DATABASE " + backQuoteIfNeed(database); - executeCreateQuery(database_attach_query, context, database, it->path(), thread_pool); + executeCreateQuery(database_attach_query, context, database, it->path(), thread_pool, has_force_restore_data_flag); } thread_pool.wait(); + + if (has_force_restore_data_flag) + force_restore_data_flag_file.remove(); } } diff --git a/dbms/src/Interpreters/tests/create_query.cpp b/dbms/src/Interpreters/tests/create_query.cpp index 947b8150169..92ce941f589 100644 --- a/dbms/src/Interpreters/tests/create_query.cpp +++ b/dbms/src/Interpreters/tests/create_query.cpp @@ -83,7 +83,7 @@ int main(int argc, char ** argv) context.setPath("./"); auto database = std::make_shared("test", "./metadata/test/"); context.addDatabase("test", database); - database->loadTables(context, nullptr); + database->loadTables(context, nullptr, false); context.setCurrentDatabase("test"); InterpreterCreateQuery interpreter(ast, context); diff --git a/dbms/src/Interpreters/tests/select_query.cpp b/dbms/src/Interpreters/tests/select_query.cpp index 649af332d46..db36109f07f 100644 --- a/dbms/src/Interpreters/tests/select_query.cpp +++ b/dbms/src/Interpreters/tests/select_query.cpp @@ -36,7 +36,7 @@ try DatabasePtr system = std::make_shared("system", "./metadata/system/"); context.addDatabase("system", system); - system->loadTables(context, nullptr); + system->loadTables(context, nullptr, false); system->attachTable("one", StorageSystemOne::create("one")); system->attachTable("numbers", StorageSystemNumbers::create("numbers")); context.setCurrentDatabase("default"); diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 8da1156af61..9cafdb5184a 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -292,7 +292,7 @@ int Server::main(const std::vector & args) loadMetadata(*global_context); LOG_DEBUG(log, "Loaded metadata."); - /// Создаём системные таблицы. + /// Create system tables. if (!global_context->isDatabaseExist("system")) { Poco::File(path + "data/system").createDirectories(); @@ -300,7 +300,9 @@ int Server::main(const std::vector & args) auto system_database = std::make_shared("system", path + "metadata/system/"); global_context->addDatabase("system", system_database); - system_database->loadTables(*global_context, nullptr); + + /// 'has_force_restore_data_flag' is true, to not fail on loading query_log table, if it is corrupted. + system_database->loadTables(*global_context, nullptr, true); } DatabasePtr system_database = global_context->getDatabase("system"); diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index f703b143dfb..8ab85afc4ff 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -218,7 +218,8 @@ StoragePtr StorageFactory::get( const NamesAndTypesList & materialized_columns, const NamesAndTypesList & alias_columns, const ColumnDefaults & column_defaults, - bool attach) const + bool attach, + bool has_force_restore_data_flag) const { if (name == "Log") { @@ -739,6 +740,7 @@ For further info please read the documentation: https://clickhouse.yandex/ columns, materialized_columns, alias_columns, column_defaults, context, primary_expr_list, date_column_name, sampling_expression, index_granularity, merging_params, + has_force_restore_data_flag, context.getMergeTreeSettings()); else return StorageMergeTree::create( @@ -746,6 +748,7 @@ For further info please read the documentation: https://clickhouse.yandex/ columns, materialized_columns, alias_columns, column_defaults, context, primary_expr_list, date_column_name, sampling_expression, index_granularity, merging_params, + has_force_restore_data_flag, context.getMergeTreeSettings()); } else diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6dd29063807..cea2778f995 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -36,6 +36,7 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается. size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_) : IStorage{materialized_columns_, alias_columns_, column_defaults_}, path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'), @@ -49,7 +50,7 @@ StorageMergeTree::StorageMergeTree( increment(0), log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")) { - data.loadDataParts(false); + data.loadDataParts(has_force_restore_data_flag); data.clearOldParts(); data.clearOldTemporaryDirectories(); increment.set(data.getMaxDataPartIndex()); @@ -86,13 +87,14 @@ StoragePtr StorageMergeTree::create( const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag_, const MergeTreeSettings & settings_) { auto res = new StorageMergeTree{ path_, database_name_, table_name_, columns_, materialized_columns_, alias_columns_, column_defaults_, context_, primary_expr_ast_, date_column_name_, - sampling_expression_, index_granularity_, merging_params_, settings_ + sampling_expression_, index_granularity_, merging_params_, has_force_restore_data_flag_, settings_ }; StoragePtr res_ptr = res->thisPtr(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 4756143a5ee..a5909bd4243 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -192,6 +192,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag, const MergeTreeSettings & settings_) : IStorage{materialized_columns_, alias_columns_, column_defaults_}, context(context_), current_zookeeper(context.getZooKeeper()), database_name(database_name_), @@ -224,6 +225,12 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( LOG_WARNING(log, "Skipping the limits on severity of changes to data parts and columns (flag " << replica_path << "/flags/force_restore_data)."); } + else if (has_force_restore_data_flag) + { + skip_sanity_checks = true; + + LOG_WARNING(log, "Skipping the limits on severity of changes to data parts and columns (flag force_restore_data)."); + } } catch (const zkutil::KeeperException & e) { @@ -333,6 +340,7 @@ StoragePtr StorageReplicatedMergeTree::create( const ASTPtr & sampling_expression_, size_t index_granularity_, const MergeTreeData::MergingParams & merging_params_, + bool has_force_restore_data_flag_, const MergeTreeSettings & settings_) { auto res = new StorageReplicatedMergeTree{ @@ -341,7 +349,7 @@ StoragePtr StorageReplicatedMergeTree::create( columns_, materialized_columns_, alias_columns_, column_defaults_, context_, primary_expr_ast_, date_column_name_, sampling_expression_, index_granularity_, - merging_params_, settings_}; + merging_params_, has_force_restore_data_flag_, settings_}; StoragePtr res_ptr = res->thisPtr();