diff --git a/CHANGELOG.md b/CHANGELOG.md index 90285582b4e..dacee73440f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -488,6 +488,7 @@ * Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)). * Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). +* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)). #### New Feature * Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). @@ -599,7 +600,6 @@ * Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)). * Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)). * Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). -* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)). * Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)). * Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)). diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0774d36462d..7b9f670d340 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -746,6 +746,12 @@ The server successfully detected this situation and will download merged part fr M(ReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side.", ValueType::Microseconds) \ M(MergeTreeReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.", ValueType::Microseconds) \ M(MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, "Time spent in sending the announcement from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.", ValueType::Microseconds) \ + M(MergerMutatorsGetPartsForMergeElapsedMicroseconds, "Time spent to take data parts snapshot to build ranges from them.", ValueType::Microseconds) \ + M(MergerMutatorPrepareRangesForMergeElapsedMicroseconds, "Time spent to prepare parts ranges which can be merged according to merge predicate.", ValueType::Microseconds) \ + M(MergerMutatorSelectPartsForMergeElapsedMicroseconds, "Time spent to select parts from ranges which can be merged.", ValueType::Microseconds) \ + M(MergerMutatorRangesForMergeCount, "Amount of candidate ranges for merge", ValueType::Number) \ + M(MergerMutatorPartsInRangesForMergeCount, "Amount of candidate parts for merge", ValueType::Number) \ + M(MergerMutatorSelectRangePartsCount, "Amount of parts in selected range for merge", ValueType::Number) \ \ M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.", ValueType::Microseconds) \ M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.", ValueType::Microseconds) \ diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 3edad3d5f5e..1f806e9c1e5 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -790,6 +790,7 @@ bool CachedOnDiskReadBufferFromFile::writeCache(char * data, size_t size, size_t LOG_INFO(log, "Insert into cache is skipped due to insufficient disk space. ({})", e.displayText()); return false; } + chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); throw; } diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 08776ad5aee..f5d5fdec6ba 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -83,7 +83,8 @@ void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & loc queue_iterator->invalidate(); chassert(candidate->releasable()); - candidate->file_segment->resetQueueIterator(); + candidate->file_segment->markDelayedRemovalAndResetQueueIterator(); + /// We need to set removed flag in file segment metadata, /// because in dynamic cache resize we first remove queue entries, /// then evict which also removes file segment metadata, diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 5e42bf0113a..541f0f5607a 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -172,10 +172,11 @@ void FileSegment::setQueueIterator(Priority::IteratorPtr iterator) queue_iterator = iterator; } -void FileSegment::resetQueueIterator() +void FileSegment::markDelayedRemovalAndResetQueueIterator() { auto lk = lock(); - queue_iterator.reset(); + on_delayed_removal = true; + queue_iterator = {}; } size_t FileSegment::getCurrentWriteOffset() const @@ -701,6 +702,8 @@ void FileSegment::complete(bool allow_background_download) case State::PARTIALLY_DOWNLOADED: { chassert(current_downloaded_size > 0); + chassert(fs::exists(getPath())); + chassert(fs::file_size(getPath()) > 0); if (is_last_holder) { @@ -843,29 +846,60 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) } } - if (download_state == State::DOWNLOADED) + switch (download_state.load()) { - chassert(downloader_id.empty()); - chassert(downloaded_size == reserved_size); - chassert(downloaded_size == range().size()); - chassert(downloaded_size > 0); - chassert(std::filesystem::file_size(getPath()) > 0); - check_iterator(queue_iterator); - } - else - { - if (download_state == State::DOWNLOADING) - { - chassert(!downloader_id.empty()); - } - else if (download_state == State::PARTIALLY_DOWNLOADED - || download_state == State::EMPTY) + case State::EMPTY: { chassert(downloader_id.empty()); + chassert(!fs::exists(getPath())); + chassert(!queue_iterator); + break; } + case State::DOWNLOADED: + { + chassert(downloader_id.empty()); - chassert(reserved_size >= downloaded_size); - check_iterator(queue_iterator); + chassert(downloaded_size == reserved_size); + chassert(downloaded_size == range().size()); + chassert(downloaded_size > 0); + chassert(fs::file_size(getPath()) > 0); + + chassert(queue_iterator || on_delayed_removal); + check_iterator(queue_iterator); + break; + } + case State::DOWNLOADING: + { + chassert(!downloader_id.empty()); + if (downloaded_size) + { + chassert(queue_iterator); + chassert(fs::file_size(getPath()) > 0); + } + break; + } + case State::PARTIALLY_DOWNLOADED: + { + chassert(downloader_id.empty()); + + chassert(reserved_size >= downloaded_size); + chassert(downloaded_size > 0); + chassert(fs::file_size(getPath()) > 0); + + chassert(queue_iterator); + check_iterator(queue_iterator); + break; + } + case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION: + { + chassert(reserved_size >= downloaded_size); + check_iterator(queue_iterator); + break; + } + case State::DETACHED: + { + break; + } } return true; @@ -993,7 +1027,12 @@ FileSegmentsHolder::FileSegmentsHolder(FileSegments && file_segments_) FileSegmentPtr FileSegmentsHolder::getSingleFileSegment() const { if (file_segments.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single file segment, got: {} in holder {}", file_segments.size(), toString()); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected single file segment, got: {} in holder {}", + file_segments.size(), toString()); + } return file_segments.front(); } @@ -1004,12 +1043,21 @@ void FileSegmentsHolder::reset() ProfileEvents::increment(ProfileEvents::FilesystemCacheUnusedHoldFileSegments, file_segments.size()); for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) { - /// One might think it would have been more correct to do `false` here, - /// not to allow background download for file segments that we actually did not start reading. - /// But actually we would only do that, if those file segments were already read partially by some other thread/query - /// but they were not put to the download queue, because current thread was holding them in Holder. - /// So as a culprit, we need to allow to happen what would have happened if we did not exist. - file_segment_it = completeAndPopFrontImpl(true); + try + { + /// One might think it would have been more correct to do `false` here, + /// not to allow background download for file segments that we actually did not start reading. + /// But actually we would only do that, if those file segments were already read partially by some other thread/query + /// but they were not put to the download queue, because current thread was holding them in Holder. + /// So as a culprit, we need to allow to happen what would have happened if we did not exist. + file_segment_it = completeAndPopFrontImpl(true); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + continue; + } } file_segments.clear(); } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 9d796111659..21d5f9dab5f 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -177,7 +177,7 @@ public: void setQueueIterator(Priority::IteratorPtr iterator); - void resetQueueIterator(); + void markDelayedRemovalAndResetQueueIterator(); KeyMetadataPtr tryGetKeyMetadata() const; @@ -249,12 +249,13 @@ private: String tryGetPath() const; - Key file_key; + const Key file_key; Range segment_range; const FileSegmentKind segment_kind; /// Size of the segment is not known until it is downloaded and /// can be bigger than max_file_segment_size. - const bool is_unbound = false; + /// is_unbound == true for temporary data in cache. + const bool is_unbound; const bool background_download_enabled; std::atomic download_state; @@ -279,6 +280,8 @@ private: std::atomic hits_count = 0; /// cache hits. std::atomic ref_count = 0; /// Used for getting snapshot state + bool on_delayed_removal = false; + CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments}; }; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 99ea01aa4f1..231545212cd 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -940,7 +940,16 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( if (file_segment->queue_iterator && invalidate_queue_entry) file_segment->queue_iterator->invalidate(); - file_segment->detach(segment_lock, *this); + try + { + file_segment->detach(segment_lock, *this); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + /// Do not rethrow, we must delete the file below. + } try { @@ -990,8 +999,8 @@ void LockedKey::shrinkFileSegmentToDownloadedSize( * because of no space left in cache, we need to be able to cut file segment's size to downloaded_size. */ - auto metadata = getByOffset(offset); - const auto & file_segment = metadata->file_segment; + auto file_segment_metadata = getByOffset(offset); + const auto & file_segment = file_segment_metadata->file_segment; chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); const size_t downloaded_size = file_segment->getDownloadedSize(); @@ -1006,15 +1015,15 @@ void LockedKey::shrinkFileSegmentToDownloadedSize( chassert(file_segment->reserved_size >= downloaded_size); int64_t diff = file_segment->reserved_size - downloaded_size; - metadata->file_segment = std::make_shared( + file_segment_metadata->file_segment = std::make_shared( getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(file_segment->getKind()), false, file_segment->cache, key_metadata, file_segment->queue_iterator); if (diff) - metadata->getQueueIterator()->decrementSize(diff); + file_segment_metadata->getQueueIterator()->decrementSize(diff); - chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); + chassert(file_segment_metadata->file_segment->assertCorrectnessUnlocked(segment_lock)); } bool LockedKey::addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 99a748bd42c..06d178ab2b8 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -49,6 +49,16 @@ namespace CurrentMetrics { extern const Metric BackgroundMergesAndMutationsPoolTask; } +namespace ProfileEvents +{ + + extern const Event MergerMutatorsGetPartsForMergeElapsedMicroseconds; + extern const Event MergerMutatorPrepareRangesForMergeElapsedMicroseconds; + extern const Event MergerMutatorSelectPartsForMergeElapsedMicroseconds; + extern const Event MergerMutatorRangesForMergeCount; + extern const Event MergerMutatorPartsInRangesForMergeCount; + extern const Event MergerMutatorSelectRangePartsCount; +} namespace DB { @@ -216,6 +226,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart { PartitionIdsHint res; MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn); + if (data_parts.empty()) return res; @@ -273,6 +284,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::getDataPartsToSelectMergeFrom( const MergeTreeTransactionPtr & txn, const PartitionIdsHint * partitions_hint) const { + + Stopwatch get_data_parts_for_merge_timer; auto res = getDataPartsToSelectMergeFrom(txn); if (!partitions_hint) return res; @@ -281,6 +294,8 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::getDataPartsToSelectM { return !partitions_hint->contains(part->info.partition_id); }); + + ProfileEvents::increment(ProfileEvents::MergerMutatorsGetPartsForMergeElapsedMicroseconds, get_data_parts_for_merge_timer.elapsedMicroseconds()); return res; } @@ -358,6 +373,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo const MergeTreeTransactionPtr & txn, PreformattedMessage & out_disable_reason) const { + Stopwatch ranges_for_merge_timer; MergeSelectingInfo res; res.current_time = std::time(nullptr); @@ -462,6 +478,9 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo std::reverse(range.begin(), range.end()); std::reverse(res.parts_ranges.begin(), res.parts_ranges.end()); + ProfileEvents::increment(ProfileEvents::MergerMutatorPartsInRangesForMergeCount, res.parts_selected_precondition); + ProfileEvents::increment(ProfileEvents::MergerMutatorRangesForMergeCount, res.parts_ranges.size()); + ProfileEvents::increment(ProfileEvents::MergerMutatorPrepareRangesForMergeElapsedMicroseconds, ranges_for_merge_timer.elapsedMicroseconds()); return res; } @@ -477,6 +496,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( PreformattedMessage & out_disable_reason, bool dry_run) { + Stopwatch select_parts_from_ranges_timer; const auto data_settings = data.getSettings(); IMergeSelector::PartsRange parts_to_merge; @@ -576,7 +596,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( if (parts_to_merge.empty()) { - out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors)"); + ProfileEvents::increment(ProfileEvents::MergerMutatorSelectPartsForMergeElapsedMicroseconds, select_parts_from_ranges_timer.elapsedMicroseconds()); + out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors) in {}ms", select_parts_from_ranges_timer.elapsedMicroseconds() / 1000); return SelectPartsDecision::CANNOT_SELECT; } } @@ -589,8 +610,11 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( parts.push_back(part); } - LOG_DEBUG(log, "Selected {} parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name); + LOG_DEBUG(log, "Selected {} parts from {} to {} in {}ms", parts.size(), parts.front()->name, parts.back()->name, select_parts_from_ranges_timer.elapsedMicroseconds() / 1000); + ProfileEvents::increment(ProfileEvents::MergerMutatorSelectRangePartsCount, parts.size()); + future_part->assign(std::move(parts)); + ProfileEvents::increment(ProfileEvents::MergerMutatorSelectPartsForMergeElapsedMicroseconds, select_parts_from_ranges_timer.elapsedMicroseconds()); return SelectPartsDecision::SELECTED; }