Merge pull request #72350 from ClickHouse/make_argmax_fastger

Optimize function argMin/Max for some data types
This commit is contained in:
alesapin 2024-11-26 11:04:17 +00:00 committed by GitHub
commit c70bf58d9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,6 @@ struct Settings;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
@ -22,9 +21,31 @@ extern const int LOGICAL_ERROR;
namespace namespace
{ {
template <class ValueType> template <class ResultType, class ValueType>
struct AggregateFunctionArgMinMaxData struct AggregateFunctionArgMinMaxData
{ {
private:
ResultType result_data;
ValueType value_data;
public:
ResultType & result() { return result_data; }
const ResultType & result() const { return result_data; }
ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; }
AggregateFunctionArgMinMaxData() = default;
explicit AggregateFunctionArgMinMaxData(TypeIndex) {}
static bool allocatesMemoryInArena(TypeIndex)
{
return ResultType::allocatesMemoryInArena() || ValueType::allocatesMemoryInArena();
}
};
template <class ValueType>
struct AggregateFunctionArgMinMaxDataGeneric
{
private: private:
SingleValueDataBaseMemoryBlock result_data; SingleValueDataBaseMemoryBlock result_data;
ValueType value_data; ValueType value_data;
@ -35,27 +56,32 @@ public:
ValueType & value() { return value_data; } ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; } const ValueType & value() const { return value_data; }
[[noreturn]] explicit AggregateFunctionArgMinMaxData() [[noreturn]] AggregateFunctionArgMinMaxDataGeneric()
{ {
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionArgMinMaxData initialized empty"); throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionArgMinMaxData initialized empty");
} }
explicit AggregateFunctionArgMinMaxData(TypeIndex result_type) : value_data() explicit AggregateFunctionArgMinMaxDataGeneric(TypeIndex result_type) : value_data()
{ {
generateSingleValueFromTypeIndex(result_type, result_data); generateSingleValueFromTypeIndex(result_type, result_data);
} }
~AggregateFunctionArgMinMaxData() { result().~SingleValueDataBase(); } static bool allocatesMemoryInArena(TypeIndex result_type_index)
{
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueType::allocatesMemoryInArena();
}
~AggregateFunctionArgMinMaxDataGeneric() { result().~SingleValueDataBase(); }
}; };
static_assert( static_assert(
sizeof(AggregateFunctionArgMinMaxData<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE, sizeof(AggregateFunctionArgMinMaxDataGeneric<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
"Incorrect size of AggregateFunctionArgMinMaxData struct"); "Incorrect size of AggregateFunctionArgMinMaxData struct");
/// Returns the first arg value found for the minimum/maximum value. Example: argMin(arg, value). /// Returns the first arg value found for the minimum/maximum value. Example: argMin(arg, value).
template <typename ValueData, bool isMin> template <typename Data, bool isMin>
class AggregateFunctionArgMinMax final class AggregateFunctionArgMinMax final
: public IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>> : public IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data, isMin>>
{ {
private: private:
const DataTypePtr & type_val; const DataTypePtr & type_val;
@ -63,7 +89,8 @@ private:
const SerializationPtr serialization_val; const SerializationPtr serialization_val;
const TypeIndex result_type_index; const TypeIndex result_type_index;
using Base = IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>;
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data, isMin>>;
public: public:
explicit AggregateFunctionArgMinMax(const DataTypes & argument_types_) explicit AggregateFunctionArgMinMax(const DataTypes & argument_types_)
@ -91,7 +118,7 @@ public:
void create(AggregateDataPtr __restrict place) const override /// NOLINT void create(AggregateDataPtr __restrict place) const override /// NOLINT
{ {
new (place) AggregateFunctionArgMinMaxData<ValueData>(result_type_index); new (place) Data(result_type_index);
} }
String getName() const override String getName() const override
@ -215,7 +242,7 @@ public:
bool allocatesMemoryInArena() const override bool allocatesMemoryInArena() const override
{ {
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueData::allocatesMemoryInArena(); return Data::allocatesMemoryInArena(result_type_index);
} }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
@ -224,12 +251,125 @@ public:
} }
}; };
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionArgMinMax( template <bool isMin, typename ResultType>
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) IAggregateFunction * createWithTwoTypesSecond(const DataTypes & argument_types)
{ {
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionArgMinMax, /* unary */ false, isMin>( const DataTypePtr & value_type = argument_types[1];
name, argument_types, parameters, settings)); WhichDataType which_value(value_type);
if (which_value.idx == TypeIndex::UInt8)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt8>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt16)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int8)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int8>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int16)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Float32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Float32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Float64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Float64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Date)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::DateTime)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
return nullptr;
}
template <bool isMin>
IAggregateFunction * createWithTwoTypes(const DataTypes & argument_types)
{
const DataTypePtr & result_type = argument_types[0];
WhichDataType which_result(result_type);
if (which_result.idx == TypeIndex::UInt8) return createWithTwoTypesSecond<isMin, UInt8>(argument_types);
if (which_result.idx == TypeIndex::UInt16) return createWithTwoTypesSecond<isMin, UInt16>(argument_types);
if (which_result.idx == TypeIndex::UInt32) return createWithTwoTypesSecond<isMin, UInt32>(argument_types);
if (which_result.idx == TypeIndex::UInt64) return createWithTwoTypesSecond<isMin, UInt64>(argument_types);
if (which_result.idx == TypeIndex::Int8) return createWithTwoTypesSecond<isMin, Int8>(argument_types);
if (which_result.idx == TypeIndex::Int16) return createWithTwoTypesSecond<isMin, Int16>(argument_types);
if (which_result.idx == TypeIndex::Int32) return createWithTwoTypesSecond<isMin, Int32>(argument_types);
if (which_result.idx == TypeIndex::Int64) return createWithTwoTypesSecond<isMin, Int64>(argument_types);
if (which_result.idx == TypeIndex::Float32) return createWithTwoTypesSecond<isMin, Float32>(argument_types);
if (which_result.idx == TypeIndex::Float64) return createWithTwoTypesSecond<isMin, Float64>(argument_types);
return nullptr;
}
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionArgMinMax(const std::string & name, const DataTypes & argument_types, const Array &, const Settings *)
{
assertBinary(name, argument_types);
AggregateFunctionPtr result = AggregateFunctionPtr(createWithTwoTypes<isMin>(argument_types));
if (!result)
{
const DataTypePtr & value_type = argument_types[1];
WhichDataType which(value_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<TYPE>>, isMin>(argument_types)); /// NOLINT
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<DataTypeDate::FieldType>>, isMin>(argument_types));
if (which.idx == TypeIndex::DateTime)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<DataTypeDateTime::FieldType>>, isMin>(argument_types));
if (which.idx == TypeIndex::String)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataString>, isMin>(argument_types));
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataGeneric>, isMin>(argument_types));
}
return result;
} }
} }