Add estimateCardinalityInPermutedRange for ColumnVector

This commit is contained in:
Igor Markelov 2024-05-26 19:23:17 +00:00
parent 9202e46ff5
commit 9412b5debd
2 changed files with 22 additions and 0 deletions

View File

@ -14,6 +14,7 @@
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/NaNUtils.h> #include <Common/NaNUtils.h>
#include <Common/RadixSort.h> #include <Common/RadixSort.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
@ -413,6 +414,25 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
} }
} }
template<typename T>
size_t ColumnVector<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
StringHashSet elements;
bool inserted = false;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t id = permutation[i];
StringRef ref = getDataAt(id);
elements.emplace(ref, inserted);
}
return elements.size();
}
template <typename T> template <typename T>
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
{ {

View File

@ -161,6 +161,8 @@ public:
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
void reserve(size_t n) override void reserve(size_t n) override
{ {
data.reserve_exact(n); data.reserve_exact(n);