2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/MergeTree/DataPartsExchange.h>
|
2020-04-29 17:14:49 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
|
|
|
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
2020-05-09 21:24:15 +00:00
|
|
|
#include <Disks/createVolume.h>
|
2020-06-03 13:27:54 +00:00
|
|
|
#include <Disks/SingleDiskVolume.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/CurrentMetrics.h>
|
|
|
|
#include <Common/NetException.h>
|
2020-06-26 21:55:48 +00:00
|
|
|
#include <Common/FileSyncGuard.h>
|
2020-04-29 17:14:49 +00:00
|
|
|
#include <DataStreams/NativeBlockOutputStream.h>
|
2018-06-14 21:20:39 +00:00
|
|
|
#include <IO/HTTPCommon.h>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/scope_guard.h>
|
2020-02-02 02:49:36 +00:00
|
|
|
#include <Poco/File.h>
|
2017-04-06 13:03:23 +00:00
|
|
|
#include <Poco/Net/HTTPServerResponse.h>
|
2017-04-11 14:13:19 +00:00
|
|
|
#include <Poco/Net/HTTPRequest.h>
|
2020-10-27 12:47:42 +00:00
|
|
|
#include <Storages/MergeTree/ReplicatedFetchList.h>
|
2014-07-22 13:49:52 +00:00
|
|
|
|
|
|
|
|
2016-10-24 04:06:27 +00:00
|
|
|
namespace CurrentMetrics
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Metric ReplicatedSend;
|
2016-10-24 04:06:27 +00:00
|
|
|
}
|
|
|
|
|
2014-07-22 13:49:52 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int DIRECTORY_ALREADY_EXISTS;
|
|
|
|
extern const int NO_SUCH_DATA_PART;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int ABORTED;
|
|
|
|
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
|
2017-08-09 13:31:13 +00:00
|
|
|
extern const int CANNOT_WRITE_TO_OSTREAM;
|
2018-11-22 21:19:58 +00:00
|
|
|
extern const int CHECKSUM_DOESNT_MATCH;
|
2019-07-31 18:21:13 +00:00
|
|
|
extern const int INSECURE_PATH;
|
2020-05-05 01:27:31 +00:00
|
|
|
extern const int CORRUPTED_DATA;
|
|
|
|
extern const int LOGICAL_ERROR;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
2016-01-28 01:00:27 +00:00
|
|
|
namespace DataPartsExchange
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
2020-03-09 01:22:33 +00:00
|
|
|
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE = 1;
|
|
|
|
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS = 2;
|
2020-05-14 20:08:15 +00:00
|
|
|
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE = 3;
|
2020-08-26 15:29:46 +00:00
|
|
|
constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION = 4;
|
2020-01-30 10:21:40 +00:00
|
|
|
|
2019-09-04 16:00:20 +00:00
|
|
|
|
2016-01-28 01:00:27 +00:00
|
|
|
std::string getEndpointId(const std::string & node_id)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return "DataPartsExchange:" + node_id;
|
2016-01-28 01:00:27 +00:00
|
|
|
}
|
|
|
|
|
2020-10-27 13:09:14 +00:00
|
|
|
/// Simple functor for tracking fetch progress in system.replicated_fetches table.
|
2020-10-26 16:38:35 +00:00
|
|
|
struct ReplicatedFetchReadCallback
|
|
|
|
{
|
|
|
|
ReplicatedFetchList::Entry & replicated_fetch_entry;
|
|
|
|
|
2020-10-27 15:29:06 +00:00
|
|
|
explicit ReplicatedFetchReadCallback(ReplicatedFetchList::Entry & replicated_fetch_entry_)
|
2020-10-26 16:38:35 +00:00
|
|
|
: replicated_fetch_entry(replicated_fetch_entry_)
|
|
|
|
{}
|
|
|
|
|
|
|
|
void operator() (size_t bytes_count)
|
|
|
|
{
|
2020-10-27 12:24:10 +00:00
|
|
|
replicated_fetch_entry->bytes_read_compressed.store(bytes_count, std::memory_order_relaxed);
|
2020-10-30 08:52:11 +00:00
|
|
|
|
|
|
|
/// It's possible when we fetch part from very old clickhouse version
|
|
|
|
/// which doesn't send total size.
|
|
|
|
if (replicated_fetch_entry->total_size_bytes_compressed != 0)
|
|
|
|
{
|
|
|
|
replicated_fetch_entry->progress.store(
|
|
|
|
static_cast<double>(bytes_count) / replicated_fetch_entry->total_size_bytes_compressed,
|
|
|
|
std::memory_order_relaxed);
|
|
|
|
}
|
2020-10-26 16:38:35 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-01-28 01:00:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string Service::getId(const std::string & node_id) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return getEndpointId(node_id);
|
2016-01-28 01:00:27 +00:00
|
|
|
}
|
2016-01-11 21:46:36 +00:00
|
|
|
|
2017-12-01 21:40:58 +00:00
|
|
|
void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, Poco::Net::HTTPServerResponse & response)
|
2014-07-22 13:49:52 +00:00
|
|
|
{
|
2020-02-27 10:43:38 +00:00
|
|
|
int client_protocol_version = parse<int>(params.get("client_protocol_version", "0"));
|
2019-09-06 12:18:56 +00:00
|
|
|
|
2019-09-04 16:00:20 +00:00
|
|
|
String part_name = params.get("part");
|
2019-05-12 14:57:23 +00:00
|
|
|
|
2019-08-26 14:24:29 +00:00
|
|
|
const auto data_settings = data.getSettings();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-08-02 20:19:06 +00:00
|
|
|
/// Validation of the input that may come from malicious replica.
|
|
|
|
MergeTreePartInfo::fromPartName(part_name, data.format_version);
|
|
|
|
|
2017-04-06 13:03:23 +00:00
|
|
|
static std::atomic_uint total_sends {0};
|
|
|
|
|
2020-06-16 02:14:53 +00:00
|
|
|
if ((data_settings->replicated_max_parallel_sends
|
|
|
|
&& total_sends >= data_settings->replicated_max_parallel_sends)
|
|
|
|
|| (data_settings->replicated_max_parallel_sends_for_table
|
|
|
|
&& data.current_table_sends >= data_settings->replicated_max_parallel_sends_for_table))
|
2017-04-06 13:03:23 +00:00
|
|
|
{
|
|
|
|
response.setStatus(std::to_string(HTTP_TOO_MANY_REQUESTS));
|
|
|
|
response.setReason("Too many concurrent fetches, try again later");
|
|
|
|
response.set("Retry-After", "10");
|
|
|
|
response.setChunkedTransferEncoding(false);
|
|
|
|
return;
|
|
|
|
}
|
2020-02-27 10:43:38 +00:00
|
|
|
|
|
|
|
/// We pretend to work as older server version, to be sure that client will correctly process our version
|
2020-08-26 15:29:46 +00:00
|
|
|
response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION))});
|
2019-09-06 12:18:56 +00:00
|
|
|
|
2017-04-06 13:03:23 +00:00
|
|
|
++total_sends;
|
|
|
|
SCOPE_EXIT({--total_sends;});
|
|
|
|
|
|
|
|
++data.current_table_sends;
|
|
|
|
SCOPE_EXIT({--data.current_table_sends;});
|
|
|
|
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(log, "Sending part {}", part_name);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
try
|
|
|
|
{
|
2017-11-18 02:34:00 +00:00
|
|
|
MergeTreeData::DataPartPtr part = findPart(part_name);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend};
|
|
|
|
|
2020-02-27 10:43:38 +00:00
|
|
|
if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE)
|
2020-04-29 17:14:49 +00:00
|
|
|
writeBinary(part->checksums.getTotalSizeOnDisk(), out);
|
2019-05-12 14:57:23 +00:00
|
|
|
|
2020-02-27 10:43:38 +00:00
|
|
|
if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS)
|
2020-01-30 10:21:40 +00:00
|
|
|
{
|
|
|
|
WriteBufferFromOwnString ttl_infos_buffer;
|
|
|
|
part->ttl_infos.write(ttl_infos_buffer);
|
|
|
|
writeBinary(ttl_infos_buffer.str(), out);
|
|
|
|
}
|
|
|
|
|
2020-05-14 20:08:15 +00:00
|
|
|
if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)
|
|
|
|
writeStringBinary(part->getType().toString(), out);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-05-14 20:08:15 +00:00
|
|
|
if (isInMemoryPart(part))
|
2020-06-03 13:27:54 +00:00
|
|
|
sendPartFromMemory(part, out);
|
2020-04-29 17:14:49 +00:00
|
|
|
else
|
2020-08-26 15:29:46 +00:00
|
|
|
{
|
|
|
|
bool send_default_compression_file = client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION;
|
|
|
|
sendPartFromDisk(part, out, send_default_compression_file);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-08-10 04:02:56 +00:00
|
|
|
catch (const NetException &)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-03-23 16:33:51 +00:00
|
|
|
/// Network error or error on remote side. No need to enqueue part for check.
|
2017-04-01 07:20:54 +00:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
catch (const Exception & e)
|
|
|
|
{
|
2017-08-09 13:31:13 +00:00
|
|
|
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
|
2017-12-26 19:15:15 +00:00
|
|
|
data.reportBrokenPart(part_name);
|
2017-04-01 07:20:54 +00:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
2017-12-26 19:15:15 +00:00
|
|
|
data.reportBrokenPart(part_name);
|
2017-04-01 07:20:54 +00:00
|
|
|
throw;
|
|
|
|
}
|
2014-07-22 13:49:52 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 13:27:54 +00:00
|
|
|
void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out)
|
2020-04-29 17:14:49 +00:00
|
|
|
{
|
2020-06-26 11:30:23 +00:00
|
|
|
auto metadata_snapshot = data.getInMemoryMetadataPtr();
|
2020-06-05 20:47:46 +00:00
|
|
|
auto part_in_memory = asInMemoryPart(part);
|
2020-04-29 17:14:49 +00:00
|
|
|
if (!part_in_memory)
|
2020-05-05 01:27:31 +00:00
|
|
|
throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-06-26 11:30:23 +00:00
|
|
|
NativeBlockOutputStream block_out(out, 0, metadata_snapshot->getSampleBlock());
|
2020-05-05 01:27:31 +00:00
|
|
|
part->checksums.write(out);
|
2020-04-29 17:14:49 +00:00
|
|
|
block_out.write(part_in_memory->block);
|
|
|
|
}
|
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_default_compression_file)
|
2020-04-29 17:14:49 +00:00
|
|
|
{
|
|
|
|
/// We'll take a list of files from the list of checksums.
|
|
|
|
MergeTreeData::DataPart::Checksums checksums = part->checksums;
|
|
|
|
/// Add files that are not in the checksum list.
|
2020-08-26 15:29:46 +00:00
|
|
|
auto file_names_without_checksums = part->getFileNamesWithoutChecksums();
|
|
|
|
for (const auto & file_name : file_names_without_checksums)
|
|
|
|
{
|
|
|
|
if (!send_default_compression_file && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME)
|
|
|
|
continue;
|
|
|
|
checksums.files[file_name] = {};
|
|
|
|
}
|
2020-04-29 17:14:49 +00:00
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
auto disk = part->volume->getDisk();
|
2020-04-29 17:14:49 +00:00
|
|
|
MergeTreeData::DataPart::Checksums data_checksums;
|
|
|
|
|
|
|
|
writeBinary(checksums.files.size(), out);
|
|
|
|
for (const auto & it : checksums.files)
|
|
|
|
{
|
|
|
|
String file_name = it.first;
|
|
|
|
|
|
|
|
String path = part->getFullRelativePath() + file_name;
|
|
|
|
|
|
|
|
UInt64 size = disk->getFileSize(path);
|
|
|
|
|
|
|
|
writeStringBinary(it.first, out);
|
|
|
|
writeBinary(size, out);
|
|
|
|
|
|
|
|
auto file_in = disk->readFile(path);
|
|
|
|
HashingWriteBuffer hashing_out(out);
|
|
|
|
copyData(*file_in, hashing_out, blocker.getCounter());
|
|
|
|
|
|
|
|
if (blocker.isCancelled())
|
|
|
|
throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED);
|
|
|
|
|
|
|
|
if (hashing_out.count() != size)
|
|
|
|
throw Exception("Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
|
|
|
|
|
|
|
writePODBinary(hashing_out.getHash(), out);
|
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
if (!file_names_without_checksums.count(file_name))
|
2020-04-29 17:14:49 +00:00
|
|
|
data_checksums.addFile(file_name, hashing_out.count(), hashing_out.getHash());
|
|
|
|
}
|
|
|
|
|
|
|
|
part->checksums.checkEqual(data_checksums, false);
|
|
|
|
}
|
|
|
|
|
2016-01-28 01:00:27 +00:00
|
|
|
MergeTreeData::DataPartPtr Service::findPart(const String & name)
|
|
|
|
{
|
2017-12-18 17:26:46 +00:00
|
|
|
/// It is important to include PreCommitted and Outdated parts here because remote replicas cannot reliably
|
|
|
|
/// determine the local state of the part, so queries for the parts in these states are completely normal.
|
|
|
|
auto part = data.getPartIfExists(
|
|
|
|
name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
|
2017-04-01 07:20:54 +00:00
|
|
|
if (part)
|
|
|
|
return part;
|
2017-10-06 15:17:14 +00:00
|
|
|
|
|
|
|
throw Exception("No part " + name + " in table", ErrorCodes::NO_SUCH_DATA_PART);
|
2016-01-28 01:00:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2017-04-01 07:20:54 +00:00
|
|
|
const String & part_name,
|
|
|
|
const String & replica_path,
|
|
|
|
const String & host,
|
|
|
|
int port,
|
2017-12-27 17:58:52 +00:00
|
|
|
const ConnectionTimeouts & timeouts,
|
2018-07-26 15:10:57 +00:00
|
|
|
const String & user,
|
|
|
|
const String & password,
|
2018-07-30 18:32:21 +00:00
|
|
|
const String & interserver_scheme,
|
2018-05-21 13:49:54 +00:00
|
|
|
bool to_detached,
|
|
|
|
const String & tmp_prefix_)
|
2014-07-22 13:49:52 +00:00
|
|
|
{
|
2020-06-16 02:14:53 +00:00
|
|
|
if (blocker.isCancelled())
|
|
|
|
throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED);
|
|
|
|
|
2019-08-02 20:19:06 +00:00
|
|
|
/// Validation of the input that may come from malicious replica.
|
2020-10-26 16:38:35 +00:00
|
|
|
auto part_info = MergeTreePartInfo::fromPartName(part_name, data.format_version);
|
2019-08-26 14:24:29 +00:00
|
|
|
const auto data_settings = data.getSettings();
|
2019-08-02 20:19:06 +00:00
|
|
|
|
2017-04-06 18:32:00 +00:00
|
|
|
Poco::URI uri;
|
2018-07-30 18:32:21 +00:00
|
|
|
uri.setScheme(interserver_scheme);
|
2017-04-06 18:32:00 +00:00
|
|
|
uri.setHost(host);
|
|
|
|
uri.setPort(port);
|
|
|
|
uri.setQueryParameters(
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-09-06 12:18:56 +00:00
|
|
|
{"endpoint", getEndpointId(replica_path)},
|
|
|
|
{"part", part_name},
|
2020-08-26 15:29:46 +00:00
|
|
|
{"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION)},
|
2019-09-06 12:18:56 +00:00
|
|
|
{"compress", "false"}
|
2017-11-17 20:42:03 +00:00
|
|
|
});
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-07-26 15:10:57 +00:00
|
|
|
Poco::Net::HTTPBasicCredentials creds{};
|
|
|
|
if (!user.empty())
|
|
|
|
{
|
|
|
|
creds.setUsername(user);
|
|
|
|
creds.setPassword(password);
|
|
|
|
}
|
|
|
|
|
2018-11-16 13:15:17 +00:00
|
|
|
PooledReadWriteBufferFromHTTP in{
|
2019-09-06 12:18:56 +00:00
|
|
|
uri,
|
2018-11-16 13:15:17 +00:00
|
|
|
Poco::Net::HTTPRequest::HTTP_POST,
|
|
|
|
{},
|
|
|
|
timeouts,
|
|
|
|
creds,
|
|
|
|
DBMS_DEFAULT_BUFFER_SIZE,
|
2019-09-19 07:33:54 +00:00
|
|
|
0, /* no redirects */
|
2019-08-13 10:29:31 +00:00
|
|
|
data_settings->replicated_max_parallel_fetches_for_host
|
2018-11-16 13:15:17 +00:00
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-02-27 10:43:38 +00:00
|
|
|
int server_protocol_version = parse<int>(in.getResponseCookie("server_protocol_version", "0"));
|
2019-09-09 12:28:28 +00:00
|
|
|
|
2019-11-27 09:39:44 +00:00
|
|
|
ReservationPtr reservation;
|
2020-06-26 21:55:48 +00:00
|
|
|
size_t sum_files_size = 0;
|
2020-02-27 10:43:38 +00:00
|
|
|
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE)
|
2019-09-06 12:18:56 +00:00
|
|
|
{
|
|
|
|
readBinary(sum_files_size, in);
|
2020-02-27 10:43:38 +00:00
|
|
|
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS)
|
2020-01-30 10:21:40 +00:00
|
|
|
{
|
2020-02-20 12:36:55 +00:00
|
|
|
IMergeTreeDataPart::TTLInfos ttl_infos;
|
2020-01-30 10:21:40 +00:00
|
|
|
String ttl_infos_string;
|
|
|
|
readBinary(ttl_infos_string, in);
|
|
|
|
ReadBufferFromString ttl_infos_buffer(ttl_infos_string);
|
|
|
|
assertString("ttl format version: 1\n", ttl_infos_buffer);
|
|
|
|
ttl_infos.read(ttl_infos_buffer);
|
2020-10-05 16:41:46 +00:00
|
|
|
reservation = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true);
|
2020-01-30 10:21:40 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
reservation = data.reserveSpace(sum_files_size);
|
2019-09-06 12:18:56 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-09-06 15:09:20 +00:00
|
|
|
/// We don't know real size of part because sender server version is too old
|
|
|
|
reservation = data.makeEmptyReservationOnLargestDisk();
|
2019-09-06 12:18:56 +00:00
|
|
|
}
|
|
|
|
|
2020-07-02 23:41:37 +00:00
|
|
|
bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch
|
|
|
|
&& sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch);
|
2020-06-26 21:55:48 +00:00
|
|
|
|
2020-05-14 20:08:15 +00:00
|
|
|
String part_type = "Wide";
|
2020-06-26 11:38:37 +00:00
|
|
|
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)
|
2020-05-14 20:08:15 +00:00
|
|
|
readStringBinary(part_type, in);
|
|
|
|
|
2020-10-27 12:47:42 +00:00
|
|
|
auto storage_id = data.getStorageID();
|
|
|
|
String new_part_path = part_type == "InMemory" ? "memory" : data.getFullPathOnDisk(reservation->getDisk()) + part_name + "/";
|
|
|
|
auto entry = data.global_context.getReplicatedFetchList().insert(
|
|
|
|
storage_id.getDatabaseName(), storage_id.getTableName(),
|
|
|
|
part_info.partition_id, part_name, new_part_path,
|
|
|
|
replica_path, uri, to_detached, sum_files_size);
|
|
|
|
|
2020-10-27 13:00:40 +00:00
|
|
|
in.setNextCallback(ReplicatedFetchReadCallback(*entry));
|
2020-10-27 12:47:42 +00:00
|
|
|
|
2020-06-26 11:30:23 +00:00
|
|
|
return part_type == "InMemory" ? downloadPartToMemory(part_name, metadata_snapshot, std::move(reservation), in)
|
2020-08-24 09:57:27 +00:00
|
|
|
: downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in);
|
2020-04-29 17:14:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
|
|
|
|
const String & part_name,
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2020-05-27 20:05:55 +00:00
|
|
|
ReservationPtr reservation,
|
2020-04-29 17:14:49 +00:00
|
|
|
PooledReadWriteBufferFromHTTP & in)
|
|
|
|
{
|
2020-05-05 01:27:31 +00:00
|
|
|
MergeTreeData::DataPart::Checksums checksums;
|
|
|
|
if (!checksums.read(in))
|
|
|
|
throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA);
|
|
|
|
|
2020-04-29 17:14:49 +00:00
|
|
|
NativeBlockInputStream block_in(in, 0);
|
2020-06-03 18:59:18 +00:00
|
|
|
auto block = block_in.read();
|
2020-06-03 13:27:54 +00:00
|
|
|
|
2020-10-20 15:10:24 +00:00
|
|
|
auto volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, reservation->getDisk(), 0);
|
2020-04-29 17:14:49 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr new_data_part =
|
2020-06-03 13:27:54 +00:00
|
|
|
std::make_shared<MergeTreeDataPartInMemory>(data, part_name, volume);
|
2020-04-29 17:14:49 +00:00
|
|
|
|
|
|
|
new_data_part->is_temp = true;
|
|
|
|
new_data_part->setColumns(block.getNamesAndTypesList());
|
|
|
|
new_data_part->minmax_idx.update(block, data.minmax_idx_columns);
|
2020-06-26 11:30:23 +00:00
|
|
|
new_data_part->partition.create(metadata_snapshot, block, 0);
|
2020-04-29 17:14:49 +00:00
|
|
|
|
2020-08-28 09:07:20 +00:00
|
|
|
MergedBlockOutputStream part_out(new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {}));
|
2020-04-29 17:14:49 +00:00
|
|
|
part_out.writePrefix();
|
|
|
|
part_out.write(block);
|
|
|
|
part_out.writeSuffixAndFinalizePart(new_data_part);
|
2020-05-05 01:27:31 +00:00
|
|
|
new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true);
|
2020-04-29 17:14:49 +00:00
|
|
|
|
|
|
|
return new_data_part;
|
2019-05-12 14:57:23 +00:00
|
|
|
}
|
|
|
|
|
2020-04-29 17:14:49 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
|
2019-05-12 14:57:23 +00:00
|
|
|
const String & part_name,
|
|
|
|
const String & replica_path,
|
|
|
|
bool to_detached,
|
|
|
|
const String & tmp_prefix_,
|
2020-06-26 21:55:48 +00:00
|
|
|
bool sync,
|
2019-11-27 09:39:44 +00:00
|
|
|
const ReservationPtr reservation,
|
2019-05-12 14:57:23 +00:00
|
|
|
PooledReadWriteBufferFromHTTP & in)
|
|
|
|
{
|
|
|
|
size_t files;
|
|
|
|
readBinary(files, in);
|
|
|
|
|
2020-04-08 08:41:13 +00:00
|
|
|
auto disk = reservation->getDisk();
|
|
|
|
|
2017-05-26 00:47:06 +00:00
|
|
|
static const String TMP_PREFIX = "tmp_fetch_";
|
2018-05-21 13:49:54 +00:00
|
|
|
String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_;
|
|
|
|
|
2020-04-08 08:41:13 +00:00
|
|
|
String part_relative_path = String(to_detached ? "detached/" : "") + tmp_prefix + part_name;
|
|
|
|
String part_download_path = data.getRelativeDataPath() + part_relative_path + "/";
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-08 08:41:13 +00:00
|
|
|
if (disk->exists(part_download_path))
|
|
|
|
throw Exception("Directory " + fullPath(disk, part_download_path) + " already exists.", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-08 08:41:13 +00:00
|
|
|
disk->createDirectories(part_download_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-06-26 21:55:48 +00:00
|
|
|
std::optional<FileSyncGuard> sync_guard;
|
2020-07-02 23:41:37 +00:00
|
|
|
if (data.getSettings()->fsync_part_directory)
|
2020-06-26 21:55:48 +00:00
|
|
|
sync_guard.emplace(disk, part_download_path);
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
MergeTreeData::DataPart::Checksums checksums;
|
|
|
|
for (size_t i = 0; i < files; ++i)
|
|
|
|
{
|
|
|
|
String file_name;
|
|
|
|
UInt64 file_size;
|
|
|
|
|
|
|
|
readStringBinary(file_name, in);
|
|
|
|
readBinary(file_size, in);
|
|
|
|
|
2019-07-31 18:21:13 +00:00
|
|
|
/// File must be inside "absolute_part_path" directory.
|
|
|
|
/// Otherwise malicious ClickHouse replica may force us to write to arbitrary path.
|
2020-04-08 08:41:13 +00:00
|
|
|
String absolute_file_path = Poco::Path(part_download_path + file_name).absolute().toString();
|
2020-04-08 11:56:31 +00:00
|
|
|
if (!startsWith(absolute_file_path, Poco::Path(part_download_path).absolute().toString()))
|
2020-04-08 08:41:13 +00:00
|
|
|
throw Exception("File path (" + absolute_file_path + ") doesn't appear to be inside part path (" + part_download_path + ")."
|
2019-07-31 18:21:13 +00:00
|
|
|
" This may happen if we are trying to download part from malicious replica or logical error.",
|
|
|
|
ErrorCodes::INSECURE_PATH);
|
|
|
|
|
2020-04-08 08:41:13 +00:00
|
|
|
auto file_out = disk->writeFile(part_download_path + file_name);
|
|
|
|
HashingWriteBuffer hashing_out(*file_out);
|
2017-10-12 20:34:01 +00:00
|
|
|
copyData(in, hashing_out, file_size, blocker.getCounter());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-10-06 16:53:55 +00:00
|
|
|
if (blocker.isCancelled())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-06-16 02:14:53 +00:00
|
|
|
/// NOTE The is_cancelled flag also makes sense to check every time you read over the network,
|
|
|
|
/// performing a poll with a not very large timeout.
|
2017-04-01 07:20:54 +00:00
|
|
|
/// And now we check it only between read chunks (in the `copyData` function).
|
2020-04-08 08:41:13 +00:00
|
|
|
disk->removeRecursive(part_download_path);
|
2017-04-01 07:20:54 +00:00
|
|
|
throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED);
|
|
|
|
}
|
|
|
|
|
2017-06-21 01:24:05 +00:00
|
|
|
MergeTreeDataPartChecksum::uint128 expected_hash;
|
|
|
|
readPODBinary(expected_hash, in);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (expected_hash != hashing_out.getHash())
|
2020-04-08 08:41:13 +00:00
|
|
|
throw Exception("Checksum mismatch for file " + fullPath(disk, part_download_path + file_name) + " transferred from " + replica_path,
|
2018-11-22 21:19:58 +00:00
|
|
|
ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (file_name != "checksums.txt" &&
|
2020-08-26 15:29:46 +00:00
|
|
|
file_name != "columns.txt" &&
|
|
|
|
file_name != IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME)
|
2017-04-01 07:20:54 +00:00
|
|
|
checksums.addFile(file_name, file_size, expected_hash);
|
2020-06-26 21:55:48 +00:00
|
|
|
|
|
|
|
if (sync)
|
|
|
|
hashing_out.sync();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
assertEOF(in);
|
|
|
|
|
2020-10-20 15:10:24 +00:00
|
|
|
auto volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, disk, 0);
|
2020-05-09 21:24:15 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, volume, part_relative_path);
|
2020-02-11 23:29:34 +00:00
|
|
|
new_data_part->is_temp = true;
|
2017-08-04 14:00:26 +00:00
|
|
|
new_data_part->modification_time = time(nullptr);
|
2017-08-16 19:24:50 +00:00
|
|
|
new_data_part->loadColumnsChecksumsIndexes(true, false);
|
2017-04-01 07:20:54 +00:00
|
|
|
new_data_part->checksums.checkEqual(checksums, false);
|
|
|
|
|
|
|
|
return new_data_part;
|
2014-07-22 13:49:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2016-01-28 01:00:27 +00:00
|
|
|
|
|
|
|
}
|