mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
resolve table dependencies on metadata loading
This commit is contained in:
parent
b47f34aa17
commit
b387f05d9c
@ -3,6 +3,8 @@
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -47,5 +49,23 @@ template <> struct hash<DB::QualifiedTableName>
|
||||
return qualified_table.hash();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace fmt
|
||||
{
|
||||
template <>
|
||||
struct formatter<DB::QualifiedTableName>
|
||||
{
|
||||
constexpr auto parse(format_parse_context & ctx)
|
||||
{
|
||||
return ctx.begin();
|
||||
}
|
||||
|
||||
template <typename FormatContext>
|
||||
auto format(const DB::QualifiedTableName & name, FormatContext & ctx)
|
||||
{
|
||||
return format_to(ctx.out(), "{}.{}", DB::backQuoteIfNeed(name.database), DB::backQuoteIfNeed(name.table));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
84
src/Databases/DDLDependencyVisitor.cpp
Normal file
84
src/Databases/DDLDependencyVisitor.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
#include <Databases/DDLDependencyVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (const auto * function = ast->as<ASTFunction>())
|
||||
visit(*function, data);
|
||||
}
|
||||
|
||||
bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & /*child*/)
|
||||
{
|
||||
return !node->as<ASTStorage>();
|
||||
}
|
||||
|
||||
void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data)
|
||||
{
|
||||
if (function.name == "joinGet" ||
|
||||
function.name == "dictHas" ||
|
||||
function.name == "dictIsIn" ||
|
||||
function.name.starts_with("dictGet"))
|
||||
{
|
||||
extractTableNameFromArgument(function, data, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx)
|
||||
{
|
||||
/// Just ignore incorrect arguments, proper exception will be thrown later
|
||||
if (!function.arguments || function.arguments->children.size() <= arg_idx)
|
||||
return;
|
||||
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
const auto * arg = function.arguments->as<ASTExpressionList>()->children[arg_idx].get();
|
||||
if (const auto * literal = arg->as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return;
|
||||
|
||||
String maybe_qualified_name = literal->value.get<String>();
|
||||
auto pos = maybe_qualified_name.find('.');
|
||||
if (pos == 0 || pos == (maybe_qualified_name.size() - 1))
|
||||
{
|
||||
/// Most likely name is invalid
|
||||
return;
|
||||
}
|
||||
else if (pos == std::string::npos)
|
||||
{
|
||||
table_name = std::move(maybe_qualified_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
database_name = maybe_qualified_name.substr(0, pos);
|
||||
table_name = maybe_qualified_name.substr(pos + 1);
|
||||
}
|
||||
}
|
||||
else if (const auto * identifier = arg->as<ASTIdentifier>())
|
||||
{
|
||||
auto table_identifier = identifier->createTable();
|
||||
if (!table_identifier)
|
||||
return;
|
||||
|
||||
database_name = table_identifier->getDatabaseName();
|
||||
table_name = table_identifier->shortName();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (database_name.empty())
|
||||
database_name = data.default_database;
|
||||
data.dependencies.push_back(QualifiedTableName{std::move(database_name), std::move(table_name)});
|
||||
}
|
||||
|
||||
}
|
35
src/Databases/DDLDependencyVisitor.h
Normal file
35
src/Databases/DDLDependencyVisitor.h
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTFunction;
|
||||
|
||||
|
||||
class DDLDependencyVisitor
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
using TableDependencies = std::vector<QualifiedTableName>;
|
||||
String default_database;
|
||||
TableDependencies dependencies;
|
||||
};
|
||||
|
||||
using Visitor = ConstInDepthNodeVisitor<DDLDependencyVisitor, true>;
|
||||
|
||||
static void visit(const ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
|
||||
private:
|
||||
static void visit(const ASTFunction & function, Data & data);
|
||||
|
||||
static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx);
|
||||
};
|
||||
|
||||
using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor;
|
||||
|
||||
}
|
@ -608,7 +608,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
|
||||
}
|
||||
|
||||
/// Read and parse metadata in parallel
|
||||
ThreadPool pool;
|
||||
ThreadPool pool{1};
|
||||
for (const auto & file : metadata_files)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <Databases/DatabaseOnDisk.h>
|
||||
#include <Databases/DatabaseOrdinary.h>
|
||||
#include <Databases/DatabasesCommon.h>
|
||||
#include <Databases/DDLDependencyVisitor.h>
|
||||
#include <Databases/TablesLoader.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
@ -27,8 +29,6 @@ namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
|
||||
static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
|
||||
static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
|
||||
|
||||
namespace
|
||||
@ -60,15 +60,6 @@ namespace
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch)
|
||||
{
|
||||
if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
|
||||
{
|
||||
LOG_INFO(log, "{}%", processed * 100.0 / total);
|
||||
watch.restart();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -90,14 +81,82 @@ void DatabaseOrdinary::loadStoredObjects(
|
||||
* Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order,
|
||||
* which does not correspond to order tables creation and does not correspond to order of their location on disk.
|
||||
*/
|
||||
using FileNames = std::map<std::string, ASTPtr>;
|
||||
std::mutex file_names_mutex;
|
||||
FileNames file_names;
|
||||
|
||||
size_t total_dictionaries = 0;
|
||||
ParsedTablesMetadata metadata;
|
||||
loadTablesMetadata(local_context, metadata);
|
||||
|
||||
auto process_metadata = [&file_names, &total_dictionaries, &file_names_mutex, this](
|
||||
const String & file_name)
|
||||
size_t total_tables = metadata.metadata.size() - metadata.total_dictionaries;
|
||||
|
||||
AtomicStopwatch watch;
|
||||
std::atomic<size_t> dictionaries_processed{0};
|
||||
std::atomic<size_t> tables_processed{0};
|
||||
|
||||
ThreadPool pool;
|
||||
|
||||
/// We must attach dictionaries before attaching tables
|
||||
/// because while we're attaching tables we may need to have some dictionaries attached
|
||||
/// (for example, dictionaries can be used in the default expressions for some tables).
|
||||
/// On the other hand we can attach any dictionary (even sourced from ClickHouse table)
|
||||
/// without having any tables attached. It is so because attaching of a dictionary means
|
||||
/// loading of its config only, it doesn't involve loading the dictionary itself.
|
||||
|
||||
/// Attach dictionaries.
|
||||
for (const auto & name_with_path_and_query : metadata.metadata)
|
||||
{
|
||||
const auto & name = name_with_path_and_query.first;
|
||||
const auto & path = name_with_path_and_query.second.first;
|
||||
const auto & ast = name_with_path_and_query.second.second;
|
||||
const auto & create_query = ast->as<const ASTCreateQuery &>();
|
||||
|
||||
if (create_query.is_dictionary)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
{
|
||||
loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag);
|
||||
|
||||
/// Messages, so that it's not boring to wait for the server to load for a long time.
|
||||
logAboutProgress(log, ++dictionaries_processed, metadata.total_dictionaries, watch);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
||||
/// Attach tables.
|
||||
for (const auto & name_with_path_and_query : metadata.metadata)
|
||||
{
|
||||
const auto & name = name_with_path_and_query.first;
|
||||
const auto & path = name_with_path_and_query.second.first;
|
||||
const auto & ast = name_with_path_and_query.second.second;
|
||||
const auto & create_query = ast->as<const ASTCreateQuery &>();
|
||||
|
||||
if (!create_query.is_dictionary)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
{
|
||||
loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag);
|
||||
|
||||
/// Messages, so that it's not boring to wait for the server to load for a long time.
|
||||
logAboutProgress(log, ++tables_processed, total_tables, watch);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
||||
if (!skip_startup_tables)
|
||||
{
|
||||
/// After all tables was basically initialized, startup them.
|
||||
startupTablesImpl(pool);
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata)
|
||||
{
|
||||
size_t prev_tables_count = metadata.metadata.size();
|
||||
size_t prev_total_dictionaries = metadata.total_dictionaries;
|
||||
|
||||
auto process_metadata = [&metadata, this](const String & file_name)
|
||||
{
|
||||
fs::path path(getMetadataPath());
|
||||
fs::path file_path(file_name);
|
||||
@ -122,9 +181,19 @@ void DatabaseOrdinary::loadStoredObjects(
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard lock{file_names_mutex};
|
||||
file_names[file_name] = ast;
|
||||
total_dictionaries += create_query->is_dictionary;
|
||||
TableLoadingDependenciesVisitor::Data data;
|
||||
data.default_database = metadata.default_database;
|
||||
TableLoadingDependenciesVisitor visitor{data};
|
||||
visitor.visit(ast);
|
||||
QualifiedTableName qualified_name{database_name, create_query->table};
|
||||
|
||||
std::lock_guard lock{metadata.mutex};
|
||||
metadata.metadata[qualified_name] = std::make_pair(full_path.string(), std::move(ast));
|
||||
if (data.dependencies.empty())
|
||||
metadata.independent_tables.insert(std::move(qualified_name));
|
||||
else
|
||||
metadata.table_dependencies.insert({std::move(qualified_name), std::move(data.dependencies)});
|
||||
metadata.total_dictionaries += create_query->is_dictionary;
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
@ -136,77 +205,28 @@ void DatabaseOrdinary::loadStoredObjects(
|
||||
|
||||
iterateMetadataFiles(local_context, process_metadata);
|
||||
|
||||
size_t total_tables = file_names.size() - total_dictionaries;
|
||||
size_t objects_in_database = metadata.metadata.size() - prev_tables_count;
|
||||
size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries;
|
||||
size_t tables_in_database = objects_in_database - dictionaries_in_database;
|
||||
|
||||
LOG_INFO(log, "Total {} tables and {} dictionaries.", total_tables, total_dictionaries);
|
||||
LOG_INFO(log, "Total {} tables and {} dictionaries.", tables_in_database, dictionaries_in_database);
|
||||
}
|
||||
|
||||
AtomicStopwatch watch;
|
||||
std::atomic<size_t> tables_processed{0};
|
||||
void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore)
|
||||
{
|
||||
assert(name.database == database_name);
|
||||
const auto & create_query = ast->as<const ASTCreateQuery &>();
|
||||
|
||||
ThreadPool pool;
|
||||
tryAttachTable(
|
||||
local_context,
|
||||
create_query,
|
||||
*this,
|
||||
name.database,
|
||||
file_path,
|
||||
force_restore);
|
||||
|
||||
/// We must attach dictionaries before attaching tables
|
||||
/// because while we're attaching tables we may need to have some dictionaries attached
|
||||
/// (for example, dictionaries can be used in the default expressions for some tables).
|
||||
/// On the other hand we can attach any dictionary (even sourced from ClickHouse table)
|
||||
/// without having any tables attached. It is so because attaching of a dictionary means
|
||||
/// loading of its config only, it doesn't involve loading the dictionary itself.
|
||||
|
||||
/// Attach dictionaries.
|
||||
for (const auto & name_with_query : file_names)
|
||||
{
|
||||
const auto & create_query = name_with_query.second->as<const ASTCreateQuery &>();
|
||||
|
||||
if (create_query.is_dictionary)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
{
|
||||
tryAttachTable(
|
||||
local_context,
|
||||
create_query,
|
||||
*this,
|
||||
database_name,
|
||||
getMetadataPath() + name_with_query.first,
|
||||
has_force_restore_data_flag);
|
||||
|
||||
/// Messages, so that it's not boring to wait for the server to load for a long time.
|
||||
logAboutProgress(log, ++tables_processed, total_tables, watch);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
||||
/// Attach tables.
|
||||
for (const auto & name_with_query : file_names)
|
||||
{
|
||||
const auto & create_query = name_with_query.second->as<const ASTCreateQuery &>();
|
||||
|
||||
if (!create_query.is_dictionary)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
{
|
||||
tryAttachTable(
|
||||
local_context,
|
||||
create_query,
|
||||
*this,
|
||||
database_name,
|
||||
getMetadataPath() + name_with_query.first,
|
||||
has_force_restore_data_flag);
|
||||
|
||||
/// Messages, so that it's not boring to wait for the server to load for a long time.
|
||||
logAboutProgress(log, ++tables_processed, total_tables, watch);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
||||
if (!skip_startup_tables)
|
||||
{
|
||||
/// After all tables was basically initialized, startup them.
|
||||
startupTablesImpl(pool);
|
||||
}
|
||||
/// Messages, so that it's not boring to wait for the server to load for a long time.
|
||||
//logAboutProgress(log, ++tables_processed, total_tables, watch);
|
||||
}
|
||||
|
||||
void DatabaseOrdinary::startupTables()
|
||||
|
@ -22,6 +22,12 @@ public:
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override;
|
||||
|
||||
bool supportsLoadingInTopologicalOrder() const override { return true; }
|
||||
|
||||
void loadTablesMetadata(ContextPtr context, ParsedTablesMetadata & metadata) override;
|
||||
|
||||
void loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) override;
|
||||
|
||||
void startupTables() override;
|
||||
|
||||
void alterTable(
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/UUID.h>
|
||||
|
||||
#include <Core/QualifiedTableName.h> //FIXME
|
||||
|
||||
#include <ctime>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
@ -25,6 +27,7 @@ struct StorageInMemoryMetadata;
|
||||
struct StorageID;
|
||||
class ASTCreateQuery;
|
||||
using DictionariesWithID = std::vector<std::pair<String, UUID>>;
|
||||
struct ParsedTablesMetadata;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
@ -131,6 +134,18 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool supportsLoadingInTopologicalOrder() const { return false; }
|
||||
|
||||
virtual void loadTablesMetadata(ContextPtr /*local_context*/, ParsedTablesMetadata & /*metadata*/)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented");
|
||||
}
|
||||
|
||||
virtual void loadTableFromMetadata(ContextMutablePtr /*local_context*/, const String & /*file_path*/, const QualifiedTableName & /*name*/, const ASTPtr & /*ast*/, bool /*force_restore*/)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented");
|
||||
}
|
||||
|
||||
virtual void startupTables() {}
|
||||
|
||||
/// Check the existence of the table.
|
||||
|
153
src/Databases/TablesLoader.cpp
Normal file
153
src/Databases/TablesLoader.cpp
Normal file
@ -0,0 +1,153 @@
|
||||
#include <Databases/TablesLoader.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
|
||||
static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
|
||||
|
||||
|
||||
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch)
|
||||
{
|
||||
if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
|
||||
{
|
||||
LOG_INFO(log, "{}%", processed * 100.0 / total);
|
||||
watch.restart();
|
||||
}
|
||||
}
|
||||
|
||||
TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases databases_, bool force_restore_, bool force_attach_)
|
||||
: global_context(global_context_)
|
||||
, databases(std::move(databases_))
|
||||
, force_restore(force_restore_)
|
||||
, force_attach(force_attach_)
|
||||
{
|
||||
all_tables.default_database = global_context->getCurrentDatabase();
|
||||
log = &Poco::Logger::get("TablesLoader");
|
||||
}
|
||||
|
||||
|
||||
void TablesLoader::loadTables()
|
||||
{
|
||||
for (auto & database : databases)
|
||||
{
|
||||
if (database->supportsLoadingInTopologicalOrder())
|
||||
databases_to_load.emplace(database->getDatabaseName(), database);
|
||||
else
|
||||
database->loadStoredObjects(global_context, force_restore, force_attach, true);
|
||||
}
|
||||
|
||||
for (auto & database : databases_to_load)
|
||||
database.second->loadTablesMetadata(global_context, all_tables);
|
||||
|
||||
auto table_does_not_exist = [&](const QualifiedTableName & table_name, const QualifiedTableName & dependency_name)
|
||||
{
|
||||
if (all_tables.metadata.contains(dependency_name))
|
||||
return false;
|
||||
if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context))
|
||||
return false;
|
||||
/// FIXME if XML dict
|
||||
|
||||
LOG_WARNING(log, "Table {} depends on {}, but seems like the second one does not exist", table_name, dependency_name);
|
||||
return true;
|
||||
};
|
||||
|
||||
removeDependencies(table_does_not_exist, all_tables.independent_tables);
|
||||
|
||||
//LOG_TRACE(log, "Independent database objects: {}", fmt::join(all_tables.independent_tables, ", "));
|
||||
//for (const auto & dependencies : all_tables.table_dependencies)
|
||||
// LOG_TRACE(log, "Database object {} depends on: {}", dependencies.first, fmt::join(dependencies.second, ", "));
|
||||
|
||||
auto is_dependency_loaded = [&](const QualifiedTableName & /*table_name*/, const QualifiedTableName & dependency_name)
|
||||
{
|
||||
return all_tables.independent_tables.contains(dependency_name);
|
||||
};
|
||||
|
||||
AtomicStopwatch watch;
|
||||
ThreadPool pool;
|
||||
size_t level = 0;
|
||||
do
|
||||
{
|
||||
assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size());
|
||||
startLoadingIndependentTables(pool, watch, level);
|
||||
std::unordered_set<QualifiedTableName> new_independent_tables;
|
||||
removeDependencies(is_dependency_loaded, new_independent_tables);
|
||||
pool.wait();
|
||||
all_tables.independent_tables = std::move(new_independent_tables);
|
||||
checkCyclicDependencies();
|
||||
++level;
|
||||
assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size());
|
||||
} while (!all_tables.independent_tables.empty());
|
||||
|
||||
for (auto & database : databases_to_load)
|
||||
{
|
||||
database.second->startupTables();
|
||||
}
|
||||
}
|
||||
|
||||
void TablesLoader::removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set<QualifiedTableName> & independent_tables)
|
||||
{
|
||||
auto table_it = all_tables.table_dependencies.begin();
|
||||
while (table_it != all_tables.table_dependencies.end())
|
||||
{
|
||||
auto & dependencies = table_it->second;
|
||||
assert(!dependencies.empty());
|
||||
auto dependency_it = dependencies.begin();
|
||||
while (dependency_it != dependencies.end())
|
||||
{
|
||||
if (need_remove_dependency(table_it->first, *dependency_it))
|
||||
dependency_it = dependencies.erase(dependency_it);
|
||||
else
|
||||
++dependency_it;
|
||||
}
|
||||
|
||||
if (dependencies.empty())
|
||||
{
|
||||
independent_tables.emplace(std::move(table_it->first));
|
||||
table_it = all_tables.table_dependencies.erase(table_it);
|
||||
}
|
||||
else
|
||||
{
|
||||
++table_it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level)
|
||||
{
|
||||
size_t total_tables = all_tables.metadata.size();
|
||||
|
||||
LOG_INFO(log, "Loading {} tables with {} dependency level", all_tables.independent_tables.size(), level);
|
||||
|
||||
for (const auto & table_name : all_tables.independent_tables)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&]()
|
||||
{
|
||||
const auto & path_and_query = all_tables.metadata[table_name];
|
||||
const auto & path = path_and_query.first;
|
||||
const auto & ast = path_and_query.second;
|
||||
databases_to_load[table_name.database]->loadTableFromMetadata(global_context, path, table_name, ast, force_restore);
|
||||
logAboutProgress(log, ++tables_processed, total_tables, watch);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void TablesLoader::checkCyclicDependencies() const
|
||||
{
|
||||
if (!all_tables.independent_tables.empty())
|
||||
return;
|
||||
if (all_tables.table_dependencies.empty())
|
||||
return;
|
||||
|
||||
for (const auto & dependencies : all_tables.table_dependencies)
|
||||
{
|
||||
LOG_WARNING(log, "Cannot resolve dependencies: Table {} depends on {}",
|
||||
dependencies.first, fmt::join(dependencies.second, ", "));
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. "
|
||||
"See server log for details.", all_tables.table_dependencies.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
63
src/Databases/TablesLoader.h
Normal file
63
src/Databases/TablesLoader.h
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INFINITE_LOOP;
|
||||
}
|
||||
|
||||
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch);
|
||||
|
||||
struct ParsedTablesMetadata
|
||||
{
|
||||
String default_database;
|
||||
|
||||
using ParsedMetadata = std::map<QualifiedTableName, std::pair<String, ASTPtr>>;
|
||||
std::mutex mutex;
|
||||
ParsedMetadata metadata;
|
||||
size_t total_dictionaries = 0;
|
||||
std::unordered_set<QualifiedTableName> independent_tables;
|
||||
std::unordered_map<QualifiedTableName, std::vector<QualifiedTableName>> table_dependencies;
|
||||
};
|
||||
|
||||
class TablesLoader
|
||||
{
|
||||
public:
|
||||
|
||||
using Databases = std::vector<DatabasePtr>;
|
||||
|
||||
TablesLoader(ContextMutablePtr global_context_, Databases databases_, bool force_restore_ = false, bool force_attach_ = false);
|
||||
|
||||
void loadTables();
|
||||
|
||||
private:
|
||||
ContextMutablePtr global_context;
|
||||
Databases databases;
|
||||
bool force_restore;
|
||||
bool force_attach;
|
||||
|
||||
std::map<String, DatabasePtr> databases_to_load;
|
||||
ParsedTablesMetadata all_tables;
|
||||
Poco::Logger * log;
|
||||
std::atomic<size_t> tables_processed{0};
|
||||
|
||||
|
||||
using RemoveDependencyPredicate = std::function<bool(const QualifiedTableName &, const QualifiedTableName &)>;
|
||||
void removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set<QualifiedTableName> & independent_tables);
|
||||
|
||||
void startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level);
|
||||
|
||||
void checkCyclicDependencies() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -157,15 +157,6 @@ void DatabaseCatalog::loadDatabases()
|
||||
/// Another background thread which drops temporary LiveViews.
|
||||
/// We should start it after loadMarkedAsDroppedTables() to avoid race condition.
|
||||
TemporaryLiveViewCleaner::instance().startup();
|
||||
|
||||
/// Start up tables after all databases are loaded.
|
||||
for (const auto & [database_name, database] : databases)
|
||||
{
|
||||
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
|
||||
continue;
|
||||
|
||||
database->startupTables();
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseCatalog::shutdownImpl()
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <Databases/DatabaseReplicated.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Databases/DatabaseOnDisk.h>
|
||||
#include <Databases/TablesLoader.h>
|
||||
|
||||
#include <Compression/CompressionFactory.h>
|
||||
|
||||
@ -271,9 +272,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
||||
renamed = true;
|
||||
}
|
||||
|
||||
/// We use global context here, because storages lifetime is bigger than query context lifetime
|
||||
database->loadStoredObjects(
|
||||
getContext()->getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach, skip_startup_tables); //-V560
|
||||
if (!load_database_without_tables)
|
||||
{
|
||||
/// We use global context here, because storages lifetime is bigger than query context lifetime
|
||||
TablesLoader loader{getContext()->getGlobalContext(), {database}, has_force_restore_data_flag, create.attach && force_attach};
|
||||
loader.loadTables();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -57,6 +57,11 @@ public:
|
||||
skip_startup_tables = skip_startup_tables_;
|
||||
}
|
||||
|
||||
void setLoadDatabaseWithoutTables(bool load_database_without_tables_)
|
||||
{
|
||||
load_database_without_tables = load_database_without_tables_;
|
||||
}
|
||||
|
||||
/// Obtain information about columns, their types, default values and column comments,
|
||||
/// for case when columns in CREATE query is specified explicitly.
|
||||
static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, ContextPtr context, bool attach);
|
||||
@ -100,6 +105,7 @@ private:
|
||||
bool internal = false;
|
||||
bool force_attach = false;
|
||||
bool skip_startup_tables = false;
|
||||
bool load_database_without_tables = false;
|
||||
|
||||
mutable String as_database_saved;
|
||||
mutable String as_table_saved;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Interpreters/loadMetadata.h>
|
||||
|
||||
#include <Databases/DatabaseOrdinary.h>
|
||||
#include <Databases/TablesLoader.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -44,6 +45,7 @@ static void executeCreateQuery(
|
||||
interpreter.setForceAttach(true);
|
||||
interpreter.setForceRestoreData(has_force_restore_data_flag);
|
||||
interpreter.setSkipStartupTables(true);
|
||||
interpreter.setLoadDatabaseWithoutTables(database != DatabaseCatalog::SYSTEM_DATABASE);
|
||||
interpreter.execute();
|
||||
}
|
||||
|
||||
@ -155,8 +157,15 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam
|
||||
if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists)
|
||||
databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name));
|
||||
|
||||
TablesLoader::Databases loaded_databases;
|
||||
for (const auto & [name, db_path] : databases)
|
||||
{
|
||||
loadDatabase(context, name, db_path, has_force_restore_data_flag);
|
||||
loaded_databases.emplace_back(DatabaseCatalog::instance().getDatabase(name));
|
||||
}
|
||||
|
||||
TablesLoader loader{context, std::move(loaded_databases), has_force_restore_data_flag, /* force_attach */ true};
|
||||
loader.loadTables();
|
||||
|
||||
if (has_force_restore_data_flag)
|
||||
{
|
||||
|
@ -0,0 +1,5 @@
|
||||
dict1
|
||||
dict2
|
||||
dict_src
|
||||
join
|
||||
t
|
43
tests/queries/0_stateless/01160_table_dependencies.sh
Executable file
43
tests/queries/0_stateless/01160_table_dependencies.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists dict_src;"
|
||||
$CLICKHOUSE_CLIENT -q "drop dictionary if exists dict1;"
|
||||
$CLICKHOUSE_CLIENT -q "drop dictionary if exists dict2;"
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists join;"
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists t;"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "create table dict_src (n int, m int, s String) engine=MergeTree order by n;"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "create dictionary dict1 (n int default 0, m int default 1, s String default 'qqq')
|
||||
PRIMARY KEY n
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_src' PASSWORD '' DB '$CLICKHOUSE_DATABASE'))
|
||||
LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "create table join(n int, m int default dictGet('$CLICKHOUSE_DATABASE.dict1', 'm', 42::UInt64)) engine=Join(any, left, n);"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "create dictionary dict2 (n int default 0, m int DEFAULT 2, s String default 'asd')
|
||||
PRIMARY KEY n
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'join' PASSWORD '' DB '$CLICKHOUSE_DATABASE'))
|
||||
LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "create table t (n int, m int default joinGet($CLICKHOUSE_DATABASE.join, 'm', 42::int),
|
||||
s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64)) engine=MergeTree order by n;"
|
||||
|
||||
CLICKHOUSE_CLIENT_DEFAULT_DB=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--database=${CLICKHOUSE_DATABASE}"'/--database=default/g')
|
||||
|
||||
for i in {1..10}; do
|
||||
$CLICKHOUSE_CLIENT_DEFAULT_DB -q "detach database $CLICKHOUSE_DATABASE;"
|
||||
$CLICKHOUSE_CLIENT_DEFAULT_DB -q "attach database $CLICKHOUSE_DATABASE;"
|
||||
done
|
||||
$CLICKHOUSE_CLIENT -q "show tables from $CLICKHOUSE_DATABASE;"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table dict_src;"
|
||||
$CLICKHOUSE_CLIENT -q "drop dictionary dict1;"
|
||||
$CLICKHOUSE_CLIENT -q "drop dictionary dict2;"
|
||||
$CLICKHOUSE_CLIENT -q "drop table join;"
|
||||
$CLICKHOUSE_CLIENT -q "drop table t;"
|
Loading…
Reference in New Issue
Block a user