Merge branch 'master' into limit_number_of_ranges

This commit is contained in:
alesapin 2024-11-06 17:30:24 +01:00 committed by GitHub
commit 3f95bade2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 132 additions and 40 deletions

View File

@ -488,6 +488,7 @@
* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
#### New Feature
* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)).
@ -599,7 +600,6 @@
* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)).
* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)).
* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)).
* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)).

View File

@ -746,6 +746,12 @@ The server successfully detected this situation and will download merged part fr
M(ReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side.", ValueType::Microseconds) \
M(MergeTreeReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.", ValueType::Microseconds) \
M(MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, "Time spent in sending the announcement from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.", ValueType::Microseconds) \
M(MergerMutatorsGetPartsForMergeElapsedMicroseconds, "Time spent to take data parts snapshot to build ranges from them.", ValueType::Microseconds) \
M(MergerMutatorPrepareRangesForMergeElapsedMicroseconds, "Time spent to prepare parts ranges which can be merged according to merge predicate.", ValueType::Microseconds) \
M(MergerMutatorSelectPartsForMergeElapsedMicroseconds, "Time spent to select parts from ranges which can be merged.", ValueType::Microseconds) \
M(MergerMutatorRangesForMergeCount, "Amount of candidate ranges for merge", ValueType::Number) \
M(MergerMutatorPartsInRangesForMergeCount, "Amount of candidate parts for merge", ValueType::Number) \
M(MergerMutatorSelectRangePartsCount, "Amount of parts in selected range for merge", ValueType::Number) \
\
M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.", ValueType::Microseconds) \
M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.", ValueType::Microseconds) \

View File

@ -790,6 +790,7 @@ bool CachedOnDiskReadBufferFromFile::writeCache(char * data, size_t size, size_t
LOG_INFO(log, "Insert into cache is skipped due to insufficient disk space. ({})", e.displayText());
return false;
}
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
throw;
}

View File

@ -83,7 +83,8 @@ void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & loc
queue_iterator->invalidate();
chassert(candidate->releasable());
candidate->file_segment->resetQueueIterator();
candidate->file_segment->markDelayedRemovalAndResetQueueIterator();
/// We need to set removed flag in file segment metadata,
/// because in dynamic cache resize we first remove queue entries,
/// then evict which also removes file segment metadata,

View File

@ -172,10 +172,11 @@ void FileSegment::setQueueIterator(Priority::IteratorPtr iterator)
queue_iterator = iterator;
}
void FileSegment::resetQueueIterator()
void FileSegment::markDelayedRemovalAndResetQueueIterator()
{
auto lk = lock();
queue_iterator.reset();
on_delayed_removal = true;
queue_iterator = {};
}
size_t FileSegment::getCurrentWriteOffset() const
@ -701,6 +702,8 @@ void FileSegment::complete(bool allow_background_download)
case State::PARTIALLY_DOWNLOADED:
{
chassert(current_downloaded_size > 0);
chassert(fs::exists(getPath()));
chassert(fs::file_size(getPath()) > 0);
if (is_last_holder)
{
@ -843,29 +846,60 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock)
}
}
if (download_state == State::DOWNLOADED)
switch (download_state.load())
{
chassert(downloader_id.empty());
chassert(downloaded_size == reserved_size);
chassert(downloaded_size == range().size());
chassert(downloaded_size > 0);
chassert(std::filesystem::file_size(getPath()) > 0);
check_iterator(queue_iterator);
}
else
{
if (download_state == State::DOWNLOADING)
{
chassert(!downloader_id.empty());
}
else if (download_state == State::PARTIALLY_DOWNLOADED
|| download_state == State::EMPTY)
case State::EMPTY:
{
chassert(downloader_id.empty());
chassert(!fs::exists(getPath()));
chassert(!queue_iterator);
break;
}
case State::DOWNLOADED:
{
chassert(downloader_id.empty());
chassert(reserved_size >= downloaded_size);
check_iterator(queue_iterator);
chassert(downloaded_size == reserved_size);
chassert(downloaded_size == range().size());
chassert(downloaded_size > 0);
chassert(fs::file_size(getPath()) > 0);
chassert(queue_iterator || on_delayed_removal);
check_iterator(queue_iterator);
break;
}
case State::DOWNLOADING:
{
chassert(!downloader_id.empty());
if (downloaded_size)
{
chassert(queue_iterator);
chassert(fs::file_size(getPath()) > 0);
}
break;
}
case State::PARTIALLY_DOWNLOADED:
{
chassert(downloader_id.empty());
chassert(reserved_size >= downloaded_size);
chassert(downloaded_size > 0);
chassert(fs::file_size(getPath()) > 0);
chassert(queue_iterator);
check_iterator(queue_iterator);
break;
}
case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
{
chassert(reserved_size >= downloaded_size);
check_iterator(queue_iterator);
break;
}
case State::DETACHED:
{
break;
}
}
return true;
@ -993,7 +1027,12 @@ FileSegmentsHolder::FileSegmentsHolder(FileSegments && file_segments_)
FileSegmentPtr FileSegmentsHolder::getSingleFileSegment() const
{
if (file_segments.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single file segment, got: {} in holder {}", file_segments.size(), toString());
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Expected single file segment, got: {} in holder {}",
file_segments.size(), toString());
}
return file_segments.front();
}
@ -1004,12 +1043,21 @@ void FileSegmentsHolder::reset()
ProfileEvents::increment(ProfileEvents::FilesystemCacheUnusedHoldFileSegments, file_segments.size());
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
{
/// One might think it would have been more correct to do `false` here,
/// not to allow background download for file segments that we actually did not start reading.
/// But actually we would only do that, if those file segments were already read partially by some other thread/query
/// but they were not put to the download queue, because current thread was holding them in Holder.
/// So as a culprit, we need to allow to happen what would have happened if we did not exist.
file_segment_it = completeAndPopFrontImpl(true);
try
{
/// One might think it would have been more correct to do `false` here,
/// not to allow background download for file segments that we actually did not start reading.
/// But actually we would only do that, if those file segments were already read partially by some other thread/query
/// but they were not put to the download queue, because current thread was holding them in Holder.
/// So as a culprit, we need to allow to happen what would have happened if we did not exist.
file_segment_it = completeAndPopFrontImpl(true);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
continue;
}
}
file_segments.clear();
}

View File

@ -177,7 +177,7 @@ public:
void setQueueIterator(Priority::IteratorPtr iterator);
void resetQueueIterator();
void markDelayedRemovalAndResetQueueIterator();
KeyMetadataPtr tryGetKeyMetadata() const;
@ -249,12 +249,13 @@ private:
String tryGetPath() const;
Key file_key;
const Key file_key;
Range segment_range;
const FileSegmentKind segment_kind;
/// Size of the segment is not known until it is downloaded and
/// can be bigger than max_file_segment_size.
const bool is_unbound = false;
/// is_unbound == true for temporary data in cache.
const bool is_unbound;
const bool background_download_enabled;
std::atomic<State> download_state;
@ -279,6 +280,8 @@ private:
std::atomic<size_t> hits_count = 0; /// cache hits.
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
bool on_delayed_removal = false;
CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments};
};

View File

@ -940,7 +940,16 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl(
if (file_segment->queue_iterator && invalidate_queue_entry)
file_segment->queue_iterator->invalidate();
file_segment->detach(segment_lock, *this);
try
{
file_segment->detach(segment_lock, *this);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
/// Do not rethrow, we must delete the file below.
}
try
{
@ -990,8 +999,8 @@ void LockedKey::shrinkFileSegmentToDownloadedSize(
* because of no space left in cache, we need to be able to cut file segment's size to downloaded_size.
*/
auto metadata = getByOffset(offset);
const auto & file_segment = metadata->file_segment;
auto file_segment_metadata = getByOffset(offset);
const auto & file_segment = file_segment_metadata->file_segment;
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
const size_t downloaded_size = file_segment->getDownloadedSize();
@ -1006,15 +1015,15 @@ void LockedKey::shrinkFileSegmentToDownloadedSize(
chassert(file_segment->reserved_size >= downloaded_size);
int64_t diff = file_segment->reserved_size - downloaded_size;
metadata->file_segment = std::make_shared<FileSegment>(
file_segment_metadata->file_segment = std::make_shared<FileSegment>(
getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED,
CreateFileSegmentSettings(file_segment->getKind()), false,
file_segment->cache, key_metadata, file_segment->queue_iterator);
if (diff)
metadata->getQueueIterator()->decrementSize(diff);
file_segment_metadata->getQueueIterator()->decrementSize(diff);
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
chassert(file_segment_metadata->file_segment->assertCorrectnessUnlocked(segment_lock));
}
bool LockedKey::addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &)

View File

@ -49,6 +49,16 @@ namespace CurrentMetrics
{
extern const Metric BackgroundMergesAndMutationsPoolTask;
}
namespace ProfileEvents
{
extern const Event MergerMutatorsGetPartsForMergeElapsedMicroseconds;
extern const Event MergerMutatorPrepareRangesForMergeElapsedMicroseconds;
extern const Event MergerMutatorSelectPartsForMergeElapsedMicroseconds;
extern const Event MergerMutatorRangesForMergeCount;
extern const Event MergerMutatorPartsInRangesForMergeCount;
extern const Event MergerMutatorSelectRangePartsCount;
}
namespace DB
{
@ -216,6 +226,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart
{
PartitionIdsHint res;
MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn);
if (data_parts.empty())
return res;
@ -273,6 +284,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart
MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::getDataPartsToSelectMergeFrom(
const MergeTreeTransactionPtr & txn, const PartitionIdsHint * partitions_hint) const
{
Stopwatch get_data_parts_for_merge_timer;
auto res = getDataPartsToSelectMergeFrom(txn);
if (!partitions_hint)
return res;
@ -281,6 +294,8 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::getDataPartsToSelectM
{
return !partitions_hint->contains(part->info.partition_id);
});
ProfileEvents::increment(ProfileEvents::MergerMutatorsGetPartsForMergeElapsedMicroseconds, get_data_parts_for_merge_timer.elapsedMicroseconds());
return res;
}
@ -358,6 +373,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
const MergeTreeTransactionPtr & txn,
PreformattedMessage & out_disable_reason) const
{
Stopwatch ranges_for_merge_timer;
MergeSelectingInfo res;
res.current_time = std::time(nullptr);
@ -462,6 +478,9 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
std::reverse(range.begin(), range.end());
std::reverse(res.parts_ranges.begin(), res.parts_ranges.end());
ProfileEvents::increment(ProfileEvents::MergerMutatorPartsInRangesForMergeCount, res.parts_selected_precondition);
ProfileEvents::increment(ProfileEvents::MergerMutatorRangesForMergeCount, res.parts_ranges.size());
ProfileEvents::increment(ProfileEvents::MergerMutatorPrepareRangesForMergeElapsedMicroseconds, ranges_for_merge_timer.elapsedMicroseconds());
return res;
}
@ -477,6 +496,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
PreformattedMessage & out_disable_reason,
bool dry_run)
{
Stopwatch select_parts_from_ranges_timer;
const auto data_settings = data.getSettings();
IMergeSelector::PartsRange parts_to_merge;
@ -576,7 +596,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
if (parts_to_merge.empty())
{
out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors)");
ProfileEvents::increment(ProfileEvents::MergerMutatorSelectPartsForMergeElapsedMicroseconds, select_parts_from_ranges_timer.elapsedMicroseconds());
out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors) in {}ms", select_parts_from_ranges_timer.elapsedMicroseconds() / 1000);
return SelectPartsDecision::CANNOT_SELECT;
}
}
@ -589,8 +610,11 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
parts.push_back(part);
}
LOG_DEBUG(log, "Selected {} parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name);
LOG_DEBUG(log, "Selected {} parts from {} to {} in {}ms", parts.size(), parts.front()->name, parts.back()->name, select_parts_from_ranges_timer.elapsedMicroseconds() / 1000);
ProfileEvents::increment(ProfileEvents::MergerMutatorSelectRangePartsCount, parts.size());
future_part->assign(std::move(parts));
ProfileEvents::increment(ProfileEvents::MergerMutatorSelectPartsForMergeElapsedMicroseconds, select_parts_from_ranges_timer.elapsedMicroseconds());
return SelectPartsDecision::SELECTED;
}