improve multiIF

This commit is contained in:
kevinyhzou 2023-12-18 12:02:40 +08:00
parent 380d9c996b
commit 3dc8f5ad52
2 changed files with 31 additions and 99 deletions

View File

@ -522,7 +522,6 @@ class IColumn;
M(Bool, parsedatetime_parse_without_leading_zeros, true, "Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.", 0) \
M(Bool, formatdatetime_format_without_leading_zeros, false, "Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.", 0) \
\
M(Float, skew_threshold_use_memcpy_execute_multiif_columnar, 0.9f, "The condition skew threshold to use memcpy while execute mulitiif columnar", 0) \
M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \

View File

@ -253,19 +253,12 @@ public:
if (which.is##TYPE()) \
{ \
MutableColumnPtr res = ColumnVector<TYPE>::create(rows); \
MutableColumnPtr null_map = ColumnUInt8::create(rows); \
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res, null_map, result_type->isNullable()); \
if (!result_type->isNullable()) \
{ \
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res, instruction_use_memory_copy, \
settings.skew_threshold_use_memcpy_execute_multiif_columnar); \
return std::move(res); \
} \
else \
{ \
MutableColumnPtr null_map = ColumnUInt8::create(rows); \
executeInstructionsColumnarForNullable<TYPE, INDEX>(instructions, rows, res, null_map, instruction_use_memory_copy, \
settings.skew_threshold_use_memcpy_execute_multiif_columnar); \
return ColumnNullable::create(std::move(res), std::move(null_map)); \
} \
}
#define ENUMERATE_NUMERIC_TYPES(M, INDEX) \
@ -305,7 +298,6 @@ public:
}
private:
mutable std::optional<int> instruction_use_memory_copy;
static void executeInstructions(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
{
@ -385,94 +377,16 @@ private:
}
}
template<typename S>
static void calculateWhetherUseMemoryCopy(PaddedPODArray<S> & inserts, std::vector<Instruction> & instructions, std::optional<int> & instruction_use_memory_copy,
double threshold_use_memcpy)
{
if (!instruction_use_memory_copy.has_value())
{
std::vector<UInt64> instruction_insert_sizes(instructions.size());
for (size_t i = 0; i < inserts.size(); ++i)
instruction_insert_sizes[inserts[i]] += 1;
for (size_t i = 0; i < instruction_insert_sizes.size(); ++i)
{
if (!instructions[i].source_is_constant && instruction_insert_sizes[i] * 1.0 / inserts.size() >= threshold_use_memcpy)
instruction_use_memory_copy.emplace(static_cast<int>(i));
}
}
}
template<typename T, typename S>
static void executeInstructionsColumnarForNullable(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res,
const MutableColumnPtr & null_map, std::optional<int> & instruction_use_memory_copy, double threshold_use_memcpy)
template <typename T, typename S>
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map,
bool nullable)
{
PaddedPODArray<S> inserts(rows, static_cast<S>(instructions.size()));
calculateInserts(instructions, rows, inserts);
PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(*res).getData();
PaddedPODArray<UInt8> & null_map_data = assert_cast<ColumnUInt8 &>(*null_map).getData();
std::vector<const T*> data_cols(instructions.size());
std::vector<const UInt8 *> null_map_cols(instructions.size());
calculateWhetherUseMemoryCopy(inserts, instructions, instruction_use_memory_copy, threshold_use_memcpy);
for (size_t i = 0; i < instructions.size(); ++i)
{
if (instructions[i].source->isNullable())
{
const ColumnNullable * nullable_col;
if (!instructions[i].source_is_constant)
nullable_col = assert_cast<const ColumnNullable *>(instructions[i].source.get());
else
{
const ColumnPtr data_column = assert_cast<const ColumnConst &>(*instructions[i].source).getDataColumnPtr();
nullable_col = assert_cast<const ColumnNullable *>(data_column.get());
}
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*nullable_col->getNullMapColumnPtr()).getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*nullable_col->getNestedColumnPtr()).getData().data();
}
else
{
null_map_cols[i] = ColumnUInt8::create(rows)->getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*instructions[i].source).getData().data();
}
}
if (!instruction_use_memory_copy.has_value())
{
for (size_t row_i = 0; row_i < rows; ++row_i)
{
auto & instruction = instructions[inserts[row_i]];
size_t index = instruction.source_is_constant ? 0 : row_i;
res_data[row_i] = *(data_cols[inserts[row_i]] + index);
null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index);
}
}
else
{
int val = instruction_use_memory_copy.value();
memcpy(res_data.data(), data_cols[val], sizeof(T) * rows);
memcpy(null_map_data.data(), null_map_cols[val], sizeof(UInt8) * rows);
for (size_t row_i = 0; row_i < rows; ++row_i)
{
if (inserts[row_i] == val)
continue;
auto & instruction = instructions[inserts[row_i]];
size_t index = instruction.source_is_constant ? 0 : row_i;
res_data[row_i] = *(data_cols[inserts[row_i]] + index);
null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index);
}
}
}
template <typename T, typename S>
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res,
std::optional<int> & instruction_use_memory_copy, double threshold_use_memcpy)
{
PaddedPODArray<S> inserts(rows, static_cast<S>(instructions.size()));
calculateInserts(instructions, rows, inserts);
PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(*res).getData();
calculateWhetherUseMemoryCopy(inserts, instructions, instruction_use_memory_copy, threshold_use_memcpy);
if (!instruction_use_memory_copy.has_value())
if (!nullable)
{
for (size_t row_i = 0; row_i < rows; ++row_i)
{
@ -484,15 +398,34 @@ private:
else
{
std::vector<const T*> data_cols(instructions.size());
std::vector<const UInt8 *> null_map_cols(instructions.size());
for (size_t i = 0; i < instructions.size(); ++i)
data_cols[i]= assert_cast<const ColumnVector<T> &>(*instructions[i].source).getData().data();
int val = instruction_use_memory_copy.value();
memcpy(res_data.data(), data_cols[val], sizeof(T) * rows);
{
if (instructions[i].source->isNullable())
{
const ColumnNullable * nullable_col;
if (!instructions[i].source_is_constant)
nullable_col = assert_cast<const ColumnNullable *>(instructions[i].source.get());
else
{
const ColumnPtr data_column = assert_cast<const ColumnConst &>(*instructions[i].source).getDataColumnPtr();
nullable_col = assert_cast<const ColumnNullable *>(data_column.get());
}
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*nullable_col->getNullMapColumnPtr()).getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*nullable_col->getNestedColumnPtr()).getData().data();
}
else
{
null_map_cols[i] = ColumnUInt8::create(rows)->getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*instructions[i].source).getData().data();
}
}
for (size_t row_i = 0; row_i < rows; ++row_i)
{
if (inserts[row_i] == val)
continue;
res_data[row_i] = *(data_cols[inserts[row_i]] + row_i);
auto & instruction = instructions[inserts[row_i]];
size_t index = instruction.source_is_constant ? 0 : row_i;
res_data[row_i] = *(data_cols[inserts[row_i]] + index);
null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index);
}
}
}