Merge pull request #57745 from KevinyhZou/imporve_multi_if_nullable

Improve `MultiIf` function performance while type is nullable
This commit is contained in:
Nikolay Degterinsky 2024-01-09 23:17:58 +01:00 committed by GitHub
commit 24733700fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 9 deletions

View File

@ -143,7 +143,6 @@ public:
* depending on values of conditions.
*/
std::vector<Instruction> instructions;
instructions.reserve(arguments.size() / 2 + 1);
@ -238,7 +237,7 @@ public:
}
const auto & settings = context->getSettingsRef();
const WhichDataType which(result_type);
const WhichDataType which(removeNullable(result_type));
bool execute_multiif_columnar
= settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat());
@ -254,8 +253,12 @@ public:
if (which.is##TYPE()) \
{ \
MutableColumnPtr res = ColumnVector<TYPE>::create(rows); \
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res); \
return std::move(res); \
MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res, null_map, result_type->isNullable()); \
if (!result_type->isNullable()) \
return std::move(res); \
else \
return ColumnNullable::create(std::move(res), std::move(null_map)); \
}
#define ENUMERATE_NUMERIC_TYPES(M, INDEX) \
@ -295,6 +298,7 @@ public:
}
private:
static void executeInstructions(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
{
for (size_t i = 0; i < rows; ++i)
@ -374,17 +378,59 @@ private:
}
template <typename T, typename S>
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable)
{
PaddedPODArray<S> inserts(rows, static_cast<S>(instructions.size()));
calculateInserts(instructions, rows, inserts);
PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(*res).getData();
for (size_t row_i = 0; row_i < rows; ++row_i)
if (!nullable)
{
auto & instruction = instructions[inserts[row_i]];
auto ref = instruction.source->getDataAt(row_i);
res_data[row_i] = *reinterpret_cast<const T*>(ref.data);
for (size_t row_i = 0; row_i < rows; ++row_i)
{
auto & instruction = instructions[inserts[row_i]];
auto ref = instruction.source->getDataAt(row_i);
res_data[row_i] = *reinterpret_cast<const T*>(ref.data);
}
}
else
{
PaddedPODArray<UInt8> & null_map_data = assert_cast<ColumnUInt8 &>(*null_map).getData();
std::vector<const T*> data_cols(instructions.size());
std::vector<const UInt8 *> null_map_cols(instructions.size());
ColumnPtr shared_null_map_col = nullptr;
for (size_t i = 0; i < instructions.size(); ++i)
{
if (instructions[i].source->isNullable())
{
const ColumnNullable * nullable_col;
if (!instructions[i].source_is_constant)
nullable_col = assert_cast<const ColumnNullable *>(instructions[i].source.get());
else
{
const ColumnPtr data_column = assert_cast<const ColumnConst &>(*instructions[i].source).getDataColumnPtr();
nullable_col = assert_cast<const ColumnNullable *>(data_column.get());
}
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*nullable_col->getNullMapColumnPtr()).getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*nullable_col->getNestedColumnPtr()).getData().data();
}
else
{
if (!shared_null_map_col)
{
shared_null_map_col = ColumnUInt8::create(rows, 0);
}
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*shared_null_map_col).getData().data();
data_cols[i] = assert_cast<const ColumnVector<T> &>(*instructions[i].source).getData().data();
}
}
for (size_t row_i = 0; row_i < rows; ++row_i)
{
auto & instruction = instructions[inserts[row_i]];
size_t index = instruction.source_is_constant ? 0 : row_i;
res_data[row_i] = *(data_cols[inserts[row_i]] + index);
null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index);
}
}
}

View File

@ -0,0 +1,8 @@
<test>
<create_query>CREATE TABLE test_multiif_t(d Nullable(Int64)) ENGINE Memory</create_query>
<fill_query>INSERT INTO test_multiif_t SELECT * from numbers(300000000)</fill_query>
<query>select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1</query>
<drop_query>DROP TABLE IF EXISTS test_multiif_t</drop_query>
</test>

View File

@ -0,0 +1,5 @@
-1 -1 -1
1 -1 -1
1 1 -1
1 2 \N
1 3 \N

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# NOTE: this sh wrapper is required because of shell_config
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl"
$CLICKHOUSE_CLIENT -q "create table test_tbl (d Nullable(Int64)) engine=Memory"
$CLICKHOUSE_CLIENT -q "insert into test_tbl select * from numbers(5)"
$CLICKHOUSE_CLIENT -q "select multiIf(d > 0, 1, -1), multiIf(d > 1, d-1, -1), multiIf(d > 2, null, -1) from test_tbl"
$CLICKHOUSE_CLIENT -q "drop table test_tbl"