mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 02:21:59 +00:00
Optimize SingleValueDataNumeric getIndexNotNullIf
This commit is contained in:
parent
3739d46817
commit
a38ca3bca2
@ -34,7 +34,7 @@ mergeIfAndNullFlags(const UInt8 * __restrict null_map, const UInt8 * __restrict
|
||||
|
||||
}
|
||||
|
||||
std::optional<size_t> SingleValueDataBase::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataBase::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
@ -59,7 +59,7 @@ std::optional<size_t> SingleValueDataBase::getSmallestIndex(const IColumn & colu
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> SingleValueDataBase::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataBase::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
@ -85,7 +85,7 @@ std::optional<size_t> SingleValueDataBase::getGreatestIndex(const IColumn & colu
|
||||
}
|
||||
|
||||
std::optional<size_t> SingleValueDataBase::getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end)
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
size_t index = row_begin;
|
||||
while ((index < row_end) && ((if_map && if_map[index] == 0) || (null_map && null_map[index] != 0)))
|
||||
@ -100,7 +100,7 @@ std::optional<size_t> SingleValueDataBase::getSmallestIndexNotNullIf(
|
||||
}
|
||||
|
||||
std::optional<size_t> SingleValueDataBase::getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end)
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
size_t index = row_begin;
|
||||
while ((index < row_end) && ((if_map && if_map[index] == 0) || (null_map && null_map[index] != 0)))
|
||||
@ -409,7 +409,7 @@ void SingleValueDataFixed<T>::setGreatestNotNullIf(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
@ -430,7 +430,7 @@ std::optional<size_t> SingleValueDataFixed<T>::getSmallestIndex(const IColumn &
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
@ -450,6 +450,134 @@ std::optional<size_t> SingleValueDataFixed<T>::getGreatestIndex(const IColumn &
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
|
||||
const auto & vec = assert_cast<const ColVecType &>(column);
|
||||
|
||||
if constexpr (has_find_extreme_implementation<T>)
|
||||
{
|
||||
std::optional<T> opt;
|
||||
if (!if_map)
|
||||
{
|
||||
opt = findExtremeMinNotNull(vec.getData().data(), null_map, row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return opt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (!null_map[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
else if (!null_map)
|
||||
{
|
||||
opt = findExtremeMinIf(vec.getData().data(), if_map, row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return opt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (if_map[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto final_flags = mergeIfAndNullFlags(null_map, if_map, row_begin, row_end);
|
||||
opt = findExtremeMinIf(vec.getData().data(), final_flags.get(), row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return std::nullopt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (final_flags[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t index = row_begin;
|
||||
while ((index < row_end) && ((if_map && if_map[index] == 0) || (null_map && null_map[index] != 0)))
|
||||
index++;
|
||||
if (index >= row_end)
|
||||
return std::nullopt;
|
||||
|
||||
for (size_t i = index + 1; i < row_end; i++)
|
||||
if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] < vec[index]))
|
||||
index = i;
|
||||
return {index};
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataFixed<T>::getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return std::nullopt;
|
||||
|
||||
const auto & vec = assert_cast<const ColVecType &>(column);
|
||||
|
||||
if constexpr (has_find_extreme_implementation<T>)
|
||||
{
|
||||
std::optional<T> opt;
|
||||
if (!if_map)
|
||||
{
|
||||
opt = findExtremeMaxNotNull(vec.getData().data(), null_map, row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return opt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (!null_map[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
return opt;
|
||||
}
|
||||
else if (!null_map)
|
||||
{
|
||||
opt = findExtremeMaxIf(vec.getData().data(), if_map, row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return opt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (if_map[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
return opt;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto final_flags = mergeIfAndNullFlags(null_map, if_map, row_begin, row_end);
|
||||
opt = findExtremeMaxIf(vec.getData().data(), final_flags.get(), row_begin, row_end);
|
||||
if (!opt.has_value())
|
||||
return std::nullopt;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (final_flags[i] && vec[i] == *opt)
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t index = row_begin;
|
||||
while ((index < row_end) && ((if_map && if_map[index] == 0) || (null_map && null_map[index] != 0)))
|
||||
index++;
|
||||
if (index >= row_end)
|
||||
return std::nullopt;
|
||||
|
||||
for (size_t i = index + 1; i < row_end; i++)
|
||||
if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] < vec[index]))
|
||||
index = i;
|
||||
return {index};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
@ -864,17 +992,31 @@ void SingleValueDataNumeric<T>::setGreatestNotNullIf(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
return memory.get().getSmallestIndex(column, row_begin, row_end);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end)
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
return memory.get().getGreatestIndex(column, row_begin, row_end);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
return memory.get().getSmallestIndexNotNullIf(column, null_map, if_map, row_begin, row_end);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<size_t> SingleValueDataNumeric<T>::getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const
|
||||
{
|
||||
return memory.get().getGreatestIndexNotNullIf(column, null_map, if_map, row_begin, row_end);
|
||||
}
|
||||
|
||||
#define DISPATCH(TYPE) template struct SingleValueDataNumeric<TYPE>;
|
||||
|
||||
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
|
||||
|
@ -59,12 +59,12 @@ struct SingleValueDataBase
|
||||
/// Given a column returns the index of the smallest or greatest value in it
|
||||
/// Doesn't return anything if the column is empty
|
||||
/// There are used to implement argMin / argMax
|
||||
virtual std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end);
|
||||
virtual std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end);
|
||||
static std::optional<size_t> getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end);
|
||||
static std::optional<size_t> getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end);
|
||||
virtual std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
|
||||
virtual std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
|
||||
virtual std::optional<size_t> getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
|
||||
virtual std::optional<size_t> getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
|
||||
};
|
||||
|
||||
|
||||
@ -136,8 +136,12 @@ struct SingleValueDataFixed
|
||||
size_t row_end,
|
||||
Arena *);
|
||||
|
||||
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end);
|
||||
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end);
|
||||
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
|
||||
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
|
||||
std::optional<size_t> getSmallestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
|
||||
std::optional<size_t> getGreatestIndexNotNullIf(
|
||||
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
|
||||
|
||||
static bool allocatesMemoryInArena() { return false; }
|
||||
|
||||
@ -241,8 +245,20 @@ public:
|
||||
size_t row_end,
|
||||
Arena * arena) override;
|
||||
|
||||
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) override;
|
||||
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) override;
|
||||
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const override;
|
||||
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const override;
|
||||
std::optional<size_t> getSmallestIndexNotNullIf(
|
||||
const IColumn & column,
|
||||
const UInt8 * __restrict null_map,
|
||||
const UInt8 * __restrict if_map,
|
||||
size_t row_begin,
|
||||
size_t row_end) const override;
|
||||
std::optional<size_t> getGreatestIndexNotNullIf(
|
||||
const IColumn & column,
|
||||
const UInt8 * __restrict null_map,
|
||||
const UInt8 * __restrict if_map,
|
||||
size_t row_begin,
|
||||
size_t row_end) const override;
|
||||
|
||||
static bool allocatesMemoryInArena() { return false; }
|
||||
};
|
||||
|
@ -2,6 +2,9 @@
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <Common/findExtreme.h>
|
||||
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -67,16 +70,45 @@ MULTITARGET_FUNCTION_AVX2_SSE42(
|
||||
for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++)
|
||||
ret = ComparatorClass::cmp(ret, partial_min[unroll_it]);
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < count; i++)
|
||||
{
|
||||
if (add_all_elements || !condition_map[i] == add_if_cond_zero)
|
||||
ret = ComparatorClass::cmp(ret, ptr[i]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Only native integers
|
||||
for (; i < count; i++)
|
||||
{
|
||||
constexpr bool is_min = std::same_as<ComparatorClass, MinComparator<T>>;
|
||||
if constexpr (add_all_elements)
|
||||
{
|
||||
ret = ComparatorClass::cmp(ret, ptr[i]);
|
||||
}
|
||||
else if constexpr (is_min)
|
||||
{
|
||||
bool keep_number = add_if_cond_zero ? !condition_map[i] : !!condition_map[i];
|
||||
/// If keep_number = ptr[i] * 1 + 0 * max = ptr[i]
|
||||
/// If not keep_number = ptr[i] * 0 + 1 * max = max
|
||||
T final = ptr[i] * T{keep_number} + T{!keep_number} * std::numeric_limits<T>::max();
|
||||
ret = ComparatorClass::cmp(ret, final);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::same_as<ComparatorClass, MaxComparator<T>>);
|
||||
bool keep_number = add_if_cond_zero ? !condition_map[i] : !!condition_map[i];
|
||||
/// If keep_number = ptr[i] * 1 + 0 * lowest = ptr[i]
|
||||
/// If not keep_number = ptr[i] * 0 + 1 * lowest = lowest
|
||||
T final = ptr[i] * T{keep_number} + T{!keep_number} * std::numeric_limits<T>::lowest();
|
||||
ret = ComparatorClass::cmp(ret, final);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
))
|
||||
|
||||
/// Given a vector of T finds the extreme (MIN or MAX) value
|
||||
|
@ -56,6 +56,10 @@ SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1,
|
||||
22
|
||||
SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20));
|
||||
26
|
||||
SELECT max(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128));
|
||||
0
|
||||
SELECT min(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128));
|
||||
-126
|
||||
SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100;
|
||||
10
|
||||
SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000;
|
||||
|
@ -48,6 +48,9 @@ SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1
|
||||
SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20));
|
||||
SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20));
|
||||
|
||||
SELECT max(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128));
|
||||
SELECT min(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128));
|
||||
|
||||
SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100;
|
||||
SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000;
|
||||
SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100;
|
||||
|
Loading…
Reference in New Issue
Block a user