Introduce one template level to argMin to recover some perf

This commit is contained in:
Raúl Marín 2024-01-22 17:31:34 +01:00
parent 1e00dec997
commit e1bd77fa31
11 changed files with 97 additions and 94 deletions

View File

@ -24,8 +24,9 @@ private:
SerializationPtr serialization;
public:
explicit AggregateFunctionAny(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>({type}, {}, type), serialization(type->getDefaultSerialization())
explicit AggregateFunctionAny(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
}
@ -179,7 +180,8 @@ public:
AggregateFunctionPtr
createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionAny>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionAny, /* unary */ true>(name, argument_types, parameters, settings));
}
@ -190,9 +192,9 @@ private:
SerializationPtr serialization;
public:
explicit AggregateFunctionAnyLast(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
explicit AggregateFunctionAnyLast(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
}
@ -348,7 +350,8 @@ public:
AggregateFunctionPtr createAggregateFunctionAnyLast(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionAnyLast>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionAnyLast, /* unary */ true>(name, argument_types, parameters, settings));
}
}

View File

@ -27,7 +27,7 @@ struct AggregateFunctionAnyHeavyData
using Self = AggregateFunctionAnyHeavyData;
private:
SingleValueDataBase::memory_block v_data;
SingleValueDataBaseMemoryBlock v_data;
UInt64 counter = 0;
public:

View File

@ -28,7 +28,7 @@ struct AggregateFunctionSingleValueOrNullData
using Self = AggregateFunctionSingleValueOrNullData;
private:
SingleValueDataBase::memory_block v_data;
SingleValueDataBaseMemoryBlock v_data;
bool first_value = true;
bool is_null = false;

View File

@ -22,63 +22,56 @@ extern const int LOGICAL_ERROR;
namespace
{
template <class ValueType>
struct AggregateFunctionArgMinMaxData
{
private:
SingleValueDataBase::memory_block r_data;
SingleValueDataBase::memory_block v_data;
SingleValueDataBaseMemoryBlock result_data;
ValueType value_data;
public:
SingleValueDataBase & result() { return r_data.get(); }
const SingleValueDataBase & result() const { return r_data.get(); }
SingleValueDataBase & value() { return v_data.get(); }
const SingleValueDataBase & value() const { return v_data.get(); }
SingleValueDataBase & result() { return result_data.get(); }
const SingleValueDataBase & result() const { return result_data.get(); }
ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; }
[[noreturn]] explicit AggregateFunctionArgMinMaxData()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionArgMinMaxData initialized empty");
}
explicit AggregateFunctionArgMinMaxData(TypeIndex result_type, TypeIndex value_type)
explicit AggregateFunctionArgMinMaxData(TypeIndex result_type) : value_data()
{
generateSingleValueFromTypeIndex(result_type, r_data);
generateSingleValueFromTypeIndex(value_type, v_data);
generateSingleValueFromTypeIndex(result_type, result_data);
}
~AggregateFunctionArgMinMaxData()
{
result().~SingleValueDataBase();
value().~SingleValueDataBase();
}
~AggregateFunctionArgMinMaxData() { result().~SingleValueDataBase(); }
};
static_assert(
sizeof(AggregateFunctionArgMinMaxData) == 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
sizeof(AggregateFunctionArgMinMaxData<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
"Incorrect size of AggregateFunctionArgMinMaxData struct");
/// Returns the first arg value found for the minimum/maximum value. Example: argMin(arg, value).
template <bool isMin>
template <typename ValueData, bool isMin>
class AggregateFunctionArgMinMax final
: public IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData, AggregateFunctionArgMinMax<isMin>>
: public IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>
{
private:
const DataTypePtr & type_val;
const SerializationPtr serialization_res;
const SerializationPtr serialization_val;
const TypeIndex result_type_index;
const TypeIndex value_type_index;
using Base = IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData, AggregateFunctionArgMinMax<isMin>>;
using Base = IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>;
public:
AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_)
: Base({type_res_, type_val_}, {}, type_res_)
AggregateFunctionArgMinMax(const DataTypes & argument_types_)
: Base(argument_types_, {}, argument_types_[0])
, type_val(this->argument_types[1])
, serialization_res(type_res_->getDefaultSerialization())
, serialization_val(type_val->getDefaultSerialization())
, result_type_index(WhichDataType(type_res_).idx)
, value_type_index(WhichDataType(type_val_).idx)
, serialization_res(this->argument_types[0]->getDefaultSerialization())
, serialization_val(this->argument_types[1]->getDefaultSerialization())
, result_type_index(WhichDataType(this->argument_types[0]).idx)
{
if (!type_val->isComparable())
throw Exception(
@ -90,7 +83,7 @@ public:
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
new (place) AggregateFunctionArgMinMaxData(result_type_index, value_type_index);
new (place) AggregateFunctionArgMinMaxData<ValueData>(result_type_index);
}
String getName() const override
@ -214,7 +207,7 @@ public:
bool allocatesMemoryInArena() const override
{
return singleValueTypeAllocatesMemoryInArena(result_type_index) || singleValueTypeAllocatesMemoryInArena(value_type_index);
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueData::allocatesMemoryInArena();
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
@ -224,16 +217,11 @@ public:
};
template <bool isMin>
AggregateFunctionPtr
createAggregateFunctionArgMinMax(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
AggregateFunctionPtr createAggregateFunctionArgMinMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertBinary(name, argument_types);
const DataTypePtr & res_type = argument_types[0];
const DataTypePtr & val_type = argument_types[1];
return AggregateFunctionPtr(new AggregateFunctionArgMinMax<isMin>(res_type, val_type));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionArgMinMax, /* unary */ false, isMin>(
name, argument_types, parameters, settings));
}
}

View File

@ -25,15 +25,15 @@ private:
SerializationPtr serialization;
public:
explicit AggregateFunctionMinMax(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionMinMax<Data, isMin>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
explicit AggregateFunctionMinMax(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionMinMax<Data, isMin>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
if (!type->isComparable())
if (!this->result_type->isComparable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable",
type->getName(),
this->result_type->getName(),
getName());
}
@ -189,7 +189,7 @@ AggregateFunctionPtr createAggregateFunctionMinMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionMinMax, isMin>(name, argument_types, parameters, settings));
createAggregateFunctionSingleValue<AggregateFunctionMinMax, /* unary */ true, isMin>(name, argument_types, parameters, settings));
}
}

View File

@ -16,7 +16,7 @@ namespace
struct AggregateFunctionCombinatorArgMinArgMaxData
{
private:
SingleValueDataBase::memory_block v_data;
SingleValueDataBaseMemoryBlock v_data;
public:
explicit AggregateFunctionCombinatorArgMinArgMaxData(TypeIndex value_type) { generateSingleValueFromTypeIndex(value_type, v_data); }
@ -93,7 +93,7 @@ public:
size_t sizeOfData() const override { return key_offset + sizeof(Key); }
size_t alignOfData() const override { return std::max(nested_function->alignOfData(), alignof(SingleValueDataBase::memory_block)); }
size_t alignOfData() const override { return std::max(nested_function->alignOfData(), alignof(SingleValueDataBaseMemoryBlock)); }
void create(AggregateDataPtr __restrict place) const override
{

View File

@ -1380,13 +1380,14 @@ bool SingleValueDataGeneric::setIfGreater(const SingleValueDataBase & other, Are
return false;
}
void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBase::memory_block & data)
void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBaseMemoryBlock & data)
{
#define DISPATCH(TYPE) \
if (idx == TypeIndex::TYPE) \
{ \
static_assert(sizeof(SingleValueDataNumeric<TYPE>) <= SingleValueDataBase::MAX_STORAGE_SIZE); \
new (data.memory) SingleValueDataNumeric<TYPE>; \
static_assert(sizeof(SingleValueDataNumeric<TYPE>) <= sizeof(SingleValueDataBaseMemoryBlock::memory)); \
static_assert(alignof(SingleValueDataNumeric<TYPE>) <= alignof(SingleValueDataBaseMemoryBlock)); \
new (&data.memory) SingleValueDataNumeric<TYPE>; \
return; \
}
@ -1395,24 +1396,28 @@ void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBase::memory
if (idx == TypeIndex::Date)
{
static_assert(sizeof(SingleValueDataFixed<DataTypeDate::FieldType>) <= SingleValueDataBase::MAX_STORAGE_SIZE);
new (data.memory) SingleValueDataNumeric<DataTypeDate::FieldType>;
static_assert(sizeof(SingleValueDataNumeric<DataTypeDate::FieldType>) <= sizeof(SingleValueDataBaseMemoryBlock::memory));
static_assert(alignof(SingleValueDataNumeric<DataTypeDate::FieldType>) <= alignof(SingleValueDataBaseMemoryBlock));
new (&data.memory) SingleValueDataNumeric<DataTypeDate::FieldType>;
return;
}
if (idx == TypeIndex::DateTime)
{
static_assert(sizeof(SingleValueDataFixed<DataTypeDateTime::FieldType>) <= SingleValueDataBase::MAX_STORAGE_SIZE);
new (data.memory) SingleValueDataNumeric<DataTypeDateTime::FieldType>;
static_assert(sizeof(SingleValueDataNumeric<DataTypeDateTime::FieldType>) <= sizeof(SingleValueDataBaseMemoryBlock::memory));
static_assert(alignof(SingleValueDataNumeric<DataTypeDateTime::FieldType>) <= alignof(SingleValueDataBaseMemoryBlock));
new (&data.memory) SingleValueDataNumeric<DataTypeDateTime::FieldType>;
return;
}
if (idx == TypeIndex::String)
{
static_assert(sizeof(SingleValueDataString) <= SingleValueDataBase::MAX_STORAGE_SIZE);
new (data.memory) SingleValueDataString;
static_assert(sizeof(SingleValueDataString) <= sizeof(SingleValueDataBaseMemoryBlock::memory));
static_assert(alignof(SingleValueDataString) <= alignof(SingleValueDataBaseMemoryBlock));
new (&data.memory) SingleValueDataString;
return;
}
static_assert(sizeof(SingleValueDataGeneric) <= SingleValueDataBase::MAX_STORAGE_SIZE);
new (data.memory) SingleValueDataGeneric;
static_assert(sizeof(SingleValueDataGeneric) <= sizeof(SingleValueDataBaseMemoryBlock::memory));
static_assert(alignof(SingleValueDataGeneric) <= alignof(SingleValueDataBaseMemoryBlock));
new (&data.memory) SingleValueDataGeneric;
}
bool singleValueTypeAllocatesMemoryInArena(TypeIndex idx)

View File

@ -21,19 +21,9 @@ struct SingleValueDataBase
static constexpr int nan_direction_hint = 1;
/// Any subclass (numeric, string, generic) must be smaller than MAX_STORAGE_SIZE
/// We use this knowledge to create composite data classes that use them directly by reserving a 'memory_block'
/// For example argMin holds 2 of these objects
/// For example argMin holds 1 of these (for the result), while keeping a template for the value
static constexpr UInt32 MAX_STORAGE_SIZE = 64;
/// Helper to allocate enough memory to store any derived class and subclasses will be misaligned
/// alignas is necessary as otherwise alignof(memory_block) == 1
struct alignas(MAX_STORAGE_SIZE) memory_block
{
char memory[SingleValueDataBase::MAX_STORAGE_SIZE];
SingleValueDataBase & get() { return *reinterpret_cast<SingleValueDataBase *>(memory); }
const SingleValueDataBase & get() const { return *reinterpret_cast<const SingleValueDataBase *>(memory); }
};
static_assert(alignof(memory_block) == SingleValueDataBase::MAX_STORAGE_SIZE);
virtual ~SingleValueDataBase() { }
virtual bool has() const = 0;
virtual void insertResultInto(IColumn &) const = 0;
@ -353,37 +343,52 @@ public:
static_assert(sizeof(SingleValueDataGeneric) <= SingleValueDataBase::MAX_STORAGE_SIZE, "Incorrect size of SingleValueDataGeneric struct");
/// min, max, any, anyLast, anyHeavy, etc...
template <template <typename, bool...> class AggregateFunctionTemplate, bool... isMin>
template <template <typename, bool...> class AggregateFunctionTemplate, bool unary, bool... isMin>
static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
if constexpr (unary)
assertUnary(name, argument_types);
else
assertBinary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
const DataTypePtr & value_type = unary ? argument_types[0] : argument_types[1];
WhichDataType which(value_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return new AggregateFunctionTemplate<SingleValueDataFixed<TYPE>, isMin...>(argument_type); /// NOLINT
return new AggregateFunctionTemplate<SingleValueDataFixed<TYPE>, isMin...>(argument_types); /// NOLINT
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDate::FieldType>, isMin...>(argument_type);
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDate::FieldType>, isMin...>(argument_types);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDateTime::FieldType>, isMin...>(argument_type);
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDateTime::FieldType>, isMin...>(argument_types);
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<SingleValueDataString, isMin...>(argument_type);
return new AggregateFunctionTemplate<SingleValueDataString, isMin...>(argument_types);
return new AggregateFunctionTemplate<SingleValueDataGeneric, isMin...>(argument_type);
return new AggregateFunctionTemplate<SingleValueDataGeneric, isMin...>(argument_types);
}
/// Helper when you want to build SingleValueNumeric
void generateSingleValueNumericFromTypeIndex(TypeIndex idx, SingleValueDataBase::memory_block & data);
/// Helper to allocate enough memory to store any derived class
struct SingleValueDataBaseMemoryBlock
{
std::aligned_union_t<
SingleValueDataBase::MAX_STORAGE_SIZE,
SingleValueDataNumeric<Decimal256>, /// We check all types in generateSingleValueFromTypeIndex
SingleValueDataString,
SingleValueDataGeneric>
memory;
SingleValueDataBase & get() { return *reinterpret_cast<SingleValueDataBase *>(&memory); }
const SingleValueDataBase & get() const { return *reinterpret_cast<const SingleValueDataBase *>(&memory); }
};
static_assert(alignof(SingleValueDataBaseMemoryBlock) == 8);
/// For Data classes that want to compose on top of SingleValueDataBase values, like argMax or singleValueOrNull
/// It will build the object based on the type idx on the memory block provided
void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBase::memory_block & data);
void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBaseMemoryBlock & data);
bool singleValueTypeAllocatesMemoryInArena(TypeIndex idx);
}

View File

@ -1,6 +1,6 @@
UInt64 1 8
UInt64 10 80
UInt64 1000 8000
AggregateFunction(argMax, String, DateTime) 1 136
AggregateFunction(argMax, String, DateTime) 10 1360
AggregateFunction(argMax, String, DateTime) 1000 136000
AggregateFunction(argMax, String, DateTime) 1 80
AggregateFunction(argMax, String, DateTime) 10 800
AggregateFunction(argMax, String, DateTime) 1000 80000

View File

@ -42,8 +42,10 @@ ORDER BY event_time_microseconds;
-- 1 * 8 + AggregateFunction(argMax, String, DateTime)
--
-- Size of AggregateFunction(argMax, String, DateTime):
-- 2 * MAX(sizeOf(SingleValueDataFixed<T>), sizeOf(SingleValueDataString), sizeOf(SingleValueDataGeneric))
-- Which is the same as 2 * SingleValueDataBase::MAX_STORAGE_SIZE (64)
-- 1 Base class + 1 specific/value class:
-- Base class: MAX(sizeOf(SingleValueDataFixed<T>), sizeOf(SingleValueDataString), sizeOf(SingleValueDataGeneric)) = 64
-- Specific class: SingleValueDataFixed(DateTime) = 4 + 1. With padding = 8
-- Total: 8 + 64 + 8 = 80
--
-- ColumnAggregateFunction total: 8 + 2 * 64 = 136
SELECT 'AggregateFunction(argMax, String, DateTime)',

View File

@ -1,4 +1,4 @@
-- When we use SingleValueDataBase::memory_block we must ensure we call the class destructor on destroy
-- When we use SingleValueDataBaseMemoryBlock we must ensure we call the class destructor on destroy
Select argMax((number, number), (number, number)) FROM numbers(100000) format Null;
Select argMin((number, number), (number, number)) FROM numbers(100000) format Null;