Add system.distribution_queue

system.distribution_queue contains the following columns:
- database
- table
- data_path
- is_blocked
- error_count
- data_files
- data_compressed_bytes
This commit is contained in:
Azat Khuzhin 2020-06-03 02:47:32 +03:00
parent 60d10f1bac
commit 389f78ceee
10 changed files with 232 additions and 11 deletions

View File

@ -108,11 +108,19 @@ StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
void StorageDistributedDirectoryMonitor::flushAllData()
{
if (!quit)
{
if (quit)
return;
CurrentMetrics::Increment metric_pending_files{CurrentMetrics::DistributedFilesToInsert, 0};
std::unique_lock lock{mutex};
processFiles(metric_pending_files);
const auto & files = getFiles(metric_pending_files);
if (!files.empty())
{
processFiles(files, metric_pending_files);
/// Update counters
getFiles(metric_pending_files);
}
}
@ -139,11 +147,16 @@ void StorageDistributedDirectoryMonitor::run()
while (!quit)
{
do_sleep = true;
const auto & files = getFiles(metric_pending_files);
if (files.empty())
break;
if (!monitor_blocker.isCancelled())
{
try
{
do_sleep = !processFiles(metric_pending_files);
do_sleep = !processFiles(files, metric_pending_files);
}
catch (...)
{
@ -171,6 +184,9 @@ void StorageDistributedDirectoryMonitor::run()
break;
}
/// Update counters
getFiles(metric_pending_files);
if (!quit && do_sleep)
task_handle->scheduleAfter(sleep_time.count());
}
@ -226,9 +242,10 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
}
bool StorageDistributedDirectoryMonitor::processFiles(CurrentMetrics::Increment & metric_pending_files)
std::map<UInt64, std::string> StorageDistributedDirectoryMonitor::getFiles(CurrentMetrics::Increment & metric_pending_files)
{
std::map<UInt64, std::string> files;
size_t new_bytes_count = 0;
Poco::DirectoryIterator end;
for (Poco::DirectoryIterator it{path}; it != end; ++it)
@ -237,16 +254,23 @@ bool StorageDistributedDirectoryMonitor::processFiles(CurrentMetrics::Increment
Poco::Path file_path{file_path_str};
if (!it->isDirectory() && startsWith(file_path.getExtension(), "bin"))
{
files[parse<UInt64>(file_path.getBaseName())] = file_path_str;
new_bytes_count += Poco::File(file_path).getSize();
}
}
files_count = files.size();
bytes_count = new_bytes_count;
/// Note: the value of this metric will be kept if this function will throw an exception.
/// This is needed, because in case of exception, files still pending.
metric_pending_files.changeTo(files.size());
if (files.empty())
return false;
return files;
}
bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files)
{
if (should_batch_inserts)
{
processFilesWithBatching(files, metric_pending_files);

View File

@ -37,9 +37,20 @@ public:
/// For scheduling via DistributedBlockOutputStream
bool scheduleAfter(size_t ms);
/// system.distribution_queue interface
std::string getPath() const { return path; }
/// Racy but ok
size_t getErrorCount() const { return error_count; }
size_t getFilesCount() const { return files_count; }
size_t getBytesCount() const { return bytes_count; }
size_t isBlocked() const { return monitor_blocker.isCancelled(); }
private:
void run();
bool processFiles(CurrentMetrics::Increment & metric_pending_files);
std::map<UInt64, std::string> getFiles(CurrentMetrics::Increment & metric_pending_files);
bool processFiles(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files);
void processFile(const std::string & file_path, CurrentMetrics::Increment & metric_pending_files);
void processFilesWithBatching(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files);
@ -61,7 +72,10 @@ private:
struct BatchHeader;
struct Batch;
size_t error_count{};
size_t error_count = 0;
size_t files_count = 0;
size_t bytes_count = 0;
const std::chrono::milliseconds default_sleep_time;
std::chrono::milliseconds sleep_time;
const std::chrono::milliseconds max_sleep_time;

View File

@ -656,6 +656,17 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
return *node_data.directory_monitor;
}
std::vector<StorageDistributedDirectoryMonitor *> StorageDistributed::getAllDirectoryMonitors()
{
std::vector<StorageDistributedDirectoryMonitor *> monitors;
{
std::lock_guard lock(cluster_nodes_mutex);
for (auto & node : cluster_nodes_data)
monitors.push_back(node.second.directory_monitor.get());
}
return monitors;
}
size_t StorageDistributed::getShardCount() const
{
return getCluster()->getShardCount();

View File

@ -94,6 +94,7 @@ public:
void shutdown() override;
Strings getDataPaths() const override;
size_t getInsertQueueSize() const { return 0; }
const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; }
const String & getShardingKeyColumnName() const { return sharding_key_column_name; }
@ -107,6 +108,8 @@ public:
void createDirectoryMonitors(const std::string & disk);
/// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & disk, const std::string & name);
/// Return list of all monitors lazy (because there are no monitors until at least one INSERT executed)
std::vector<StorageDistributedDirectoryMonitor *> getAllDirectoryMonitors();
void flushClusterNodesAllData();

View File

@ -0,0 +1,110 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Storages/System/StorageSystemDistributionQueue.h>
#include <Storages/Distributed/DirectoryMonitor.h>
#include <Storages/StorageDistributed.h>
#include <Storages/VirtualColumnUtils.h>
#include <Access/ContextAccess.h>
#include <Common/typeid_cast.h>
#include <Databases/IDatabase.h>
namespace DB
{
NamesAndTypesList StorageSystemDistributionQueue::getNamesAndTypes()
{
return {
{ "database", std::make_shared<DataTypeString>() },
{ "table", std::make_shared<DataTypeString>() },
{ "data_path", std::make_shared<DataTypeString>() },
{ "is_blocked", std::make_shared<DataTypeUInt8>() },
{ "error_count", std::make_shared<DataTypeUInt64>() },
{ "data_files", std::make_shared<DataTypeUInt64>() },
{ "data_compressed_bytes", std::make_shared<DataTypeUInt64>() },
};
}
void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const
{
const auto access = context.getAccess();
const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES);
std::map<String, std::map<String, StoragePtr>> tables;
for (const auto & db : DatabaseCatalog::instance().getDatabases())
{
/// Lazy database can not contain distributed tables
if (db.second->getEngineName() == "Lazy")
continue;
const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first);
for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next())
{
if (!dynamic_cast<const StorageDistributed *>(iterator->table().get()))
continue;
if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, db.first, iterator->name()))
continue;
tables[db.first][iterator->name()] = iterator->table();
}
}
MutableColumnPtr col_database_mut = ColumnString::create();
MutableColumnPtr col_table_mut = ColumnString::create();
for (auto & db : tables)
{
for (auto & table : db.second)
{
col_database_mut->insert(db.first);
col_table_mut->insert(table.first);
}
}
ColumnPtr col_database_to_filter = std::move(col_database_mut);
ColumnPtr col_table_to_filter = std::move(col_table_mut);
/// Determine what tables are needed by the conditions in the query.
{
Block filtered_block
{
{ col_database_to_filter, std::make_shared<DataTypeString>(), "database" },
{ col_table_to_filter, std::make_shared<DataTypeString>(), "table" },
};
VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context);
if (!filtered_block.rows())
return;
col_database_to_filter = filtered_block.getByName("database").column;
col_table_to_filter = filtered_block.getByName("table").column;
}
for (size_t i = 0, tables_size = col_database_to_filter->size(); i < tables_size; ++i)
{
String database = (*col_database_to_filter)[i].safeGet<const String &>();
String table = (*col_table_to_filter)[i].safeGet<const String &>();
auto & distributed_table = dynamic_cast<StorageDistributed &>(*tables[database][table]);
for (auto * monitor : distributed_table.getAllDirectoryMonitors())
{
size_t col_num = 0;
res_columns[col_num++]->insert(database);
res_columns[col_num++]->insert(table);
res_columns[col_num++]->insert(monitor->getPath());
res_columns[col_num++]->insert(monitor->isBlocked());
res_columns[col_num++]->insert(monitor->getErrorCount());
res_columns[col_num++]->insert(monitor->getFilesCount());
res_columns[col_num++]->insert(monitor->getBytesCount());
}
}
}
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <ext/shared_ptr_helper.h>
#include <Storages/System/IStorageSystemOneBlock.h>
namespace DB
{
class Context;
/** Implements the `distribution_queue` system table, which allows you to view the INSERT queues for the Distributed tables.
*/
class StorageSystemDistributionQueue final : public ext::shared_ptr_helper<StorageSystemDistributionQueue>, public IStorageSystemOneBlock<StorageSystemDistributionQueue>
{
friend struct ext::shared_ptr_helper<StorageSystemDistributionQueue>;
public:
std::string getName() const override { return "SystemDistributionQueue"; }
static NamesAndTypesList getNamesAndTypes();
protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
};
}

View File

@ -28,6 +28,7 @@
#include <Storages/System/StorageSystemProcesses.h>
#include <Storages/System/StorageSystemReplicas.h>
#include <Storages/System/StorageSystemReplicationQueue.h>
#include <Storages/System/StorageSystemDistributionQueue.h>
#include <Storages/System/StorageSystemSettings.h>
#include <Storages/System/StorageSystemMergeTreeSettings.h>
#include <Storages/System/StorageSystemTableEngines.h>
@ -124,6 +125,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper)
system_database.attachTable("mutations", StorageSystemMutations::create("mutations"));
system_database.attachTable("replicas", StorageSystemReplicas::create("replicas"));
system_database.attachTable("replication_queue", StorageSystemReplicationQueue::create("replication_queue"));
system_database.attachTable("distribution_queue", StorageSystemDistributionQueue::create("distribution_queue"));
system_database.attachTable("dictionaries", StorageSystemDictionaries::create("dictionaries"));
system_database.attachTable("models", StorageSystemModels::create("models"));
system_database.attachTable("clusters", StorageSystemClusters::create("clusters"));

View File

@ -121,6 +121,7 @@ SRCS(
System/StorageSystemQuotasUsage.cpp
System/StorageSystemReplicas.cpp
System/StorageSystemReplicationQueue.cpp
System/StorageSystemDistributionQueue.cpp
System/StorageSystemRoleGrants.cpp
System/StorageSystemRoles.cpp
System/StorageSystemRowPolicies.cpp

View File

@ -0,0 +1,6 @@
INSERT
1 0 1 1
FLUSH
1 0 0 0
UNBLOCK
0 0 0 0

View File

@ -0,0 +1,21 @@
drop table if exists null_01293;
drop table if exists dist_01293;
create table null_01293 (key Int) engine=Null();
create table dist_01293 as null_01293 engine=Distributed(test_cluster_two_shards, currentDatabase(), null_01293, key);
-- no rows, since no active monitor
select * from system.distribution_queue;
select 'INSERT';
system stop distributed sends dist_01293;
insert into dist_01293 select * from numbers(10);
select is_blocked, error_count, data_files, data_compressed_bytes>100 from system.distribution_queue;
system flush distributed dist_01293;
select 'FLUSH';
select is_blocked, error_count, data_files, data_compressed_bytes from system.distribution_queue;
select 'UNBLOCK';
system start distributed sends dist_01293;
select is_blocked, error_count, data_files, data_compressed_bytes from system.distribution_queue;