From 9412b5debd61f7854068fb715f716449508ded30 Mon Sep 17 00:00:00 2001 From: Igor Markelov Date: Sun, 26 May 2024 19:23:17 +0000 Subject: [PATCH] Add estimateCardinalityInPermutedRange for ColumnVector --- src/Columns/ColumnVector.cpp | 20 ++++++++++++++++++++ src/Columns/ColumnVector.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 4e3b9963107..498b9cb7c32 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -413,6 +414,25 @@ void ColumnVector::updatePermutation(IColumn::PermutationSortDirection direct } } +template +size_t ColumnVector::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const +{ + const size_t range_size = equal_range.size(); + if (range_size <= 1) + return range_size; + + /// TODO use sampling if the range is too large (e.g. 16k elements, but configurable) + StringHashSet elements; + bool inserted = false; + for (size_t i = equal_range.from; i < equal_range.to; ++i) + { + size_t id = permutation[i]; + StringRef ref = getDataAt(id); + elements.emplace(ref, inserted); + } + return elements.size(); +} + template MutableColumnPtr ColumnVector::cloneResized(size_t size) const { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 91bceaa4534..bbd27c91a70 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -161,6 +161,8 @@ public: void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; + size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override; + void reserve(size_t n) override { data.reserve_exact(n);