mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-14 19:45:11 +00:00
Refactoring distributed header parsing
This commit is contained in:
parent
676bc83c6d
commit
fce8b6b5ef
@ -78,6 +78,70 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct DistributedHeader
|
||||||
|
{
|
||||||
|
Settings insert_settings;
|
||||||
|
std::string insert_query;
|
||||||
|
ClientInfo client_info;
|
||||||
|
};
|
||||||
|
|
||||||
|
static DistributedHeader readDistributedHeader(ReadBuffer & in, Poco::Logger * log)
|
||||||
|
{
|
||||||
|
DistributedHeader header;
|
||||||
|
|
||||||
|
UInt64 query_size;
|
||||||
|
readVarUInt(query_size, in);
|
||||||
|
|
||||||
|
if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER)
|
||||||
|
{
|
||||||
|
/// Read the header as a string.
|
||||||
|
String header_data;
|
||||||
|
readStringBinary(header_data, in);
|
||||||
|
|
||||||
|
/// Check the checksum of the header.
|
||||||
|
CityHash_v1_0_2::uint128 checksum;
|
||||||
|
readPODBinary(checksum, in);
|
||||||
|
assertChecksum(checksum, CityHash_v1_0_2::CityHash128(header_data.data(), header_data.size()));
|
||||||
|
|
||||||
|
/// Read the parts of the header.
|
||||||
|
ReadBufferFromString header_buf(header_data);
|
||||||
|
|
||||||
|
UInt64 initiator_revision;
|
||||||
|
readVarUInt(initiator_revision, header_buf);
|
||||||
|
if (DBMS_TCP_PROTOCOL_VERSION < initiator_revision)
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features.");
|
||||||
|
}
|
||||||
|
|
||||||
|
readStringBinary(header.insert_query, header_buf);
|
||||||
|
header.insert_settings.read(header_buf);
|
||||||
|
|
||||||
|
if (header_buf.hasPendingData())
|
||||||
|
header.client_info.read(header_buf, initiator_revision);
|
||||||
|
|
||||||
|
/// Add handling new data here, for example:
|
||||||
|
///
|
||||||
|
/// if (header_buf.hasPendingData())
|
||||||
|
/// readVarUInt(my_new_data, header_buf);
|
||||||
|
///
|
||||||
|
/// And note that it is safe, because we have checksum and size for header.
|
||||||
|
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT)
|
||||||
|
{
|
||||||
|
header.insert_settings.read(in, SettingsWriteFormat::BINARY);
|
||||||
|
readStringBinary(header.insert_query, in);
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
header.insert_query.resize(query_size);
|
||||||
|
in.readStrict(header.insert_query.data(), query_size);
|
||||||
|
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -330,13 +394,10 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
Block sample_block;
|
Block sample_block;
|
||||||
Settings insert_settings;
|
|
||||||
String insert_query;
|
|
||||||
ClientInfo client_info;
|
|
||||||
|
|
||||||
/// Determine metadata of the current file and check if it is not broken.
|
/// Determine metadata of the current file and check if it is not broken.
|
||||||
ReadBufferFromFile in{file_path};
|
ReadBufferFromFile in{file_path};
|
||||||
readHeader(in, insert_settings, insert_query, client_info, log);
|
readDistributedHeader(in, log);
|
||||||
|
|
||||||
CompressedReadBuffer decompressing_in(in);
|
CompressedReadBuffer decompressing_in(in);
|
||||||
NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
|
NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
|
||||||
@ -359,16 +420,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
|||||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
|
CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
|
||||||
|
|
||||||
ReadBufferFromFile in{file_path};
|
ReadBufferFromFile in{file_path};
|
||||||
|
const auto & header = readDistributedHeader(in, log);
|
||||||
|
|
||||||
Settings insert_settings;
|
auto connection = pool->get(timeouts, &header.insert_settings);
|
||||||
std::string insert_query;
|
RemoteBlockOutputStream remote{*connection, timeouts,
|
||||||
ClientInfo client_info;
|
header.insert_query, header.insert_settings, header.client_info};
|
||||||
|
|
||||||
readHeader(in, insert_settings, insert_query, client_info, log);
|
|
||||||
|
|
||||||
auto connection = pool->get(timeouts, &insert_settings);
|
|
||||||
|
|
||||||
RemoteBlockOutputStream remote{*connection, timeouts, insert_query, insert_settings, client_info};
|
|
||||||
|
|
||||||
remote.writePrefix();
|
remote.writePrefix();
|
||||||
remote.writePrepared(in);
|
remote.writePrepared(in);
|
||||||
@ -390,57 +446,6 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
|||||||
LOG_TRACE(log, "Finished processing `{}`", file_path);
|
LOG_TRACE(log, "Finished processing `{}`", file_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageDistributedDirectoryMonitor::readHeader(
|
|
||||||
ReadBuffer & in, Settings & insert_settings, std::string & insert_query, ClientInfo & client_info, Poco::Logger * log)
|
|
||||||
{
|
|
||||||
UInt64 query_size;
|
|
||||||
readVarUInt(query_size, in);
|
|
||||||
|
|
||||||
if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER)
|
|
||||||
{
|
|
||||||
/// Read the header as a string.
|
|
||||||
String header;
|
|
||||||
readStringBinary(header, in);
|
|
||||||
|
|
||||||
/// Check the checksum of the header.
|
|
||||||
CityHash_v1_0_2::uint128 checksum;
|
|
||||||
readPODBinary(checksum, in);
|
|
||||||
assertChecksum(checksum, CityHash_v1_0_2::CityHash128(header.data(), header.size()));
|
|
||||||
|
|
||||||
/// Read the parts of the header.
|
|
||||||
ReadBufferFromString header_buf(header);
|
|
||||||
|
|
||||||
UInt64 initiator_revision;
|
|
||||||
readVarUInt(initiator_revision, header_buf);
|
|
||||||
if (DBMS_TCP_PROTOCOL_VERSION < initiator_revision)
|
|
||||||
{
|
|
||||||
LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features.");
|
|
||||||
}
|
|
||||||
|
|
||||||
readStringBinary(insert_query, header_buf);
|
|
||||||
insert_settings.read(header_buf);
|
|
||||||
|
|
||||||
if (header_buf.hasPendingData())
|
|
||||||
client_info.read(header_buf, initiator_revision);
|
|
||||||
|
|
||||||
/// Add handling new data here, for example:
|
|
||||||
/// if (header_buf.hasPendingData())
|
|
||||||
/// readVarUInt(my_new_data, header_buf);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT)
|
|
||||||
{
|
|
||||||
insert_settings.read(in, SettingsWriteFormat::BINARY);
|
|
||||||
readStringBinary(insert_query, in);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
insert_query.resize(query_size);
|
|
||||||
in.readStrict(insert_query.data(), query_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct StorageDistributedDirectoryMonitor::BatchHeader
|
struct StorageDistributedDirectoryMonitor::BatchHeader
|
||||||
{
|
{
|
||||||
Settings settings;
|
Settings settings;
|
||||||
@ -551,9 +556,6 @@ struct StorageDistributedDirectoryMonitor::Batch
|
|||||||
bool batch_broken = false;
|
bool batch_broken = false;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
Settings insert_settings;
|
|
||||||
String insert_query;
|
|
||||||
ClientInfo client_info;
|
|
||||||
std::unique_ptr<RemoteBlockOutputStream> remote;
|
std::unique_ptr<RemoteBlockOutputStream> remote;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
@ -568,12 +570,13 @@ struct StorageDistributedDirectoryMonitor::Batch
|
|||||||
}
|
}
|
||||||
|
|
||||||
ReadBufferFromFile in(file_path->second);
|
ReadBufferFromFile in(file_path->second);
|
||||||
parent.readHeader(in, insert_settings, insert_query, client_info, parent.log);
|
const auto & header = readDistributedHeader(in, parent.log);
|
||||||
|
|
||||||
if (first)
|
if (first)
|
||||||
{
|
{
|
||||||
first = false;
|
first = false;
|
||||||
remote = std::make_unique<RemoteBlockOutputStream>(*connection, timeouts, insert_query, insert_settings, client_info);
|
remote = std::make_unique<RemoteBlockOutputStream>(*connection, timeouts,
|
||||||
|
header.insert_query, header.insert_settings, header.client_info);
|
||||||
remote->writePrefix();
|
remote->writePrefix();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -652,10 +655,7 @@ public:
|
|||||||
, block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION)
|
, block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION)
|
||||||
, log{&Poco::Logger::get("DirectoryMonitorBlockInputStream")}
|
, log{&Poco::Logger::get("DirectoryMonitorBlockInputStream")}
|
||||||
{
|
{
|
||||||
Settings insert_settings;
|
readDistributedHeader(in, log);
|
||||||
String insert_query;
|
|
||||||
ClientInfo client_info;
|
|
||||||
StorageDistributedDirectoryMonitor::readHeader(in, insert_settings, insert_query, client_info, log);
|
|
||||||
|
|
||||||
block_in.readPrefix();
|
block_in.readPrefix();
|
||||||
first_block = block_in.read();
|
first_block = block_in.read();
|
||||||
@ -744,14 +744,12 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
|||||||
size_t total_rows = 0;
|
size_t total_rows = 0;
|
||||||
size_t total_bytes = 0;
|
size_t total_bytes = 0;
|
||||||
Block sample_block;
|
Block sample_block;
|
||||||
Settings insert_settings;
|
DistributedHeader header;
|
||||||
String insert_query;
|
|
||||||
ClientInfo client_info;
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
/// Determine metadata of the current file and check if it is not broken.
|
/// Determine metadata of the current file and check if it is not broken.
|
||||||
ReadBufferFromFile in{file_path};
|
ReadBufferFromFile in{file_path};
|
||||||
readHeader(in, insert_settings, insert_query, client_info, log);
|
header = readDistributedHeader(in, log);
|
||||||
|
|
||||||
CompressedReadBuffer decompressing_in(in);
|
CompressedReadBuffer decompressing_in(in);
|
||||||
NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
|
NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
|
||||||
@ -778,7 +776,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
BatchHeader batch_header(std::move(insert_settings), std::move(insert_query), std::move(client_info), std::move(sample_block));
|
BatchHeader batch_header(std::move(header.insert_settings), std::move(header.insert_query), std::move(header.client_info), std::move(sample_block));
|
||||||
Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
|
Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
|
||||||
|
|
||||||
batch.file_indices.push_back(file_idx);
|
batch.file_indices.push_back(file_idx);
|
||||||
|
@ -111,9 +111,6 @@ private:
|
|||||||
|
|
||||||
CurrentMetrics::Increment metric_pending_files;
|
CurrentMetrics::Increment metric_pending_files;
|
||||||
|
|
||||||
/// Read insert query and insert settings for backward compatible.
|
|
||||||
static void readHeader(ReadBuffer & in, Settings & insert_settings, std::string & insert_query, ClientInfo & client_info, Poco::Logger * log);
|
|
||||||
|
|
||||||
friend class DirectoryMonitorBlockInputStream;
|
friend class DirectoryMonitorBlockInputStream;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user