Merge pull request #72350 from ClickHouse/make_argmax_fastger

Optimize function argMin/Max for some data types
This commit is contained in:
alesapin 2024-11-26 11:04:17 +00:00 committed by GitHub
commit c70bf58d9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,18 +13,39 @@ struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int INCORRECT_DATA;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
}
namespace
{
template <class ValueType>
template <class ResultType, class ValueType>
struct AggregateFunctionArgMinMaxData
{
private:
ResultType result_data;
ValueType value_data;
public:
ResultType & result() { return result_data; }
const ResultType & result() const { return result_data; }
ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; }
AggregateFunctionArgMinMaxData() = default;
explicit AggregateFunctionArgMinMaxData(TypeIndex) {}
static bool allocatesMemoryInArena(TypeIndex)
{
return ResultType::allocatesMemoryInArena() || ValueType::allocatesMemoryInArena();
}
};
template <class ValueType>
struct AggregateFunctionArgMinMaxDataGeneric
{
private:
SingleValueDataBaseMemoryBlock result_data;
ValueType value_data;
@ -35,27 +56,32 @@ public:
ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; }
[[noreturn]] explicit AggregateFunctionArgMinMaxData()
[[noreturn]] AggregateFunctionArgMinMaxDataGeneric()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionArgMinMaxData initialized empty");
}
explicit AggregateFunctionArgMinMaxData(TypeIndex result_type) : value_data()
explicit AggregateFunctionArgMinMaxDataGeneric(TypeIndex result_type) : value_data()
{
generateSingleValueFromTypeIndex(result_type, result_data);
}
~AggregateFunctionArgMinMaxData() { result().~SingleValueDataBase(); }
static bool allocatesMemoryInArena(TypeIndex result_type_index)
{
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueType::allocatesMemoryInArena();
}
~AggregateFunctionArgMinMaxDataGeneric() { result().~SingleValueDataBase(); }
};
static_assert(
sizeof(AggregateFunctionArgMinMaxData<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
sizeof(AggregateFunctionArgMinMaxDataGeneric<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
"Incorrect size of AggregateFunctionArgMinMaxData struct");
/// Returns the first arg value found for the minimum/maximum value. Example: argMin(arg, value).
template <typename ValueData, bool isMin>
template <typename Data, bool isMin>
class AggregateFunctionArgMinMax final
: public IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>
: public IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data, isMin>>
{
private:
const DataTypePtr & type_val;
@ -63,7 +89,8 @@ private:
const SerializationPtr serialization_val;
const TypeIndex result_type_index;
using Base = IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>;
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data, isMin>>;
public:
explicit AggregateFunctionArgMinMax(const DataTypes & argument_types_)
@ -91,7 +118,7 @@ public:
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
new (place) AggregateFunctionArgMinMaxData<ValueData>(result_type_index);
new (place) Data(result_type_index);
}
String getName() const override
@ -215,7 +242,7 @@ public:
bool allocatesMemoryInArena() const override
{
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueData::allocatesMemoryInArena();
return Data::allocatesMemoryInArena(result_type_index);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
@ -224,12 +251,125 @@ public:
}
};
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionArgMinMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
template <bool isMin, typename ResultType>
IAggregateFunction * createWithTwoTypesSecond(const DataTypes & argument_types)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionArgMinMax, /* unary */ false, isMin>(
name, argument_types, parameters, settings));
const DataTypePtr & value_type = argument_types[1];
WhichDataType which_value(value_type);
if (which_value.idx == TypeIndex::UInt8)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt8>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt16)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::UInt64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int8)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int8>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int16)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Int64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Int64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Float32)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Float32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Float64)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<Float64>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::Date)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt16>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
if (which_value.idx == TypeIndex::DateTime)
{
using Data = AggregateFunctionArgMinMaxData<SingleValueDataFixed<ResultType>, SingleValueDataFixed<UInt32>>;
return new AggregateFunctionArgMinMax<Data, isMin>(argument_types);
}
return nullptr;
}
template <bool isMin>
IAggregateFunction * createWithTwoTypes(const DataTypes & argument_types)
{
const DataTypePtr & result_type = argument_types[0];
WhichDataType which_result(result_type);
if (which_result.idx == TypeIndex::UInt8) return createWithTwoTypesSecond<isMin, UInt8>(argument_types);
if (which_result.idx == TypeIndex::UInt16) return createWithTwoTypesSecond<isMin, UInt16>(argument_types);
if (which_result.idx == TypeIndex::UInt32) return createWithTwoTypesSecond<isMin, UInt32>(argument_types);
if (which_result.idx == TypeIndex::UInt64) return createWithTwoTypesSecond<isMin, UInt64>(argument_types);
if (which_result.idx == TypeIndex::Int8) return createWithTwoTypesSecond<isMin, Int8>(argument_types);
if (which_result.idx == TypeIndex::Int16) return createWithTwoTypesSecond<isMin, Int16>(argument_types);
if (which_result.idx == TypeIndex::Int32) return createWithTwoTypesSecond<isMin, Int32>(argument_types);
if (which_result.idx == TypeIndex::Int64) return createWithTwoTypesSecond<isMin, Int64>(argument_types);
if (which_result.idx == TypeIndex::Float32) return createWithTwoTypesSecond<isMin, Float32>(argument_types);
if (which_result.idx == TypeIndex::Float64) return createWithTwoTypesSecond<isMin, Float64>(argument_types);
return nullptr;
}
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionArgMinMax(const std::string & name, const DataTypes & argument_types, const Array &, const Settings *)
{
assertBinary(name, argument_types);
AggregateFunctionPtr result = AggregateFunctionPtr(createWithTwoTypes<isMin>(argument_types));
if (!result)
{
const DataTypePtr & value_type = argument_types[1];
WhichDataType which(value_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<TYPE>>, isMin>(argument_types)); /// NOLINT
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<DataTypeDate::FieldType>>, isMin>(argument_types));
if (which.idx == TypeIndex::DateTime)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataFixed<DataTypeDateTime::FieldType>>, isMin>(argument_types));
if (which.idx == TypeIndex::String)
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataString>, isMin>(argument_types));
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxDataGeneric<SingleValueDataGeneric>, isMin>(argument_types));
}
return result;
}
}