diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 2ff1d3a4435..dc8bee59ccb 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -2,7 +2,6 @@ #include #include "AggregateFunctionNull.h" - namespace DB { @@ -46,6 +45,29 @@ public: } }; +/** Given an array of flags, checks if it's all zeros + * When the buffer is all zeros, this is slightly faster than doing a memcmp since doesn't require allocating memory + * When the buffer has values, this is much faster since it avoids visiting all memory (and the allocation and function calls) + */ +bool ALWAYS_INLINE is_all_zeros(const UInt8 * flags, size_t size) +{ + size_t unroll_size = size - size % 8; + size_t i = 0; + while (i < unroll_size) + { + UInt64 v = *reinterpret_cast(&flags[i]); + if (v) + return false; + i += 8; + } + + for (; i < size; i++) + if (flags[i]) + return false; + + return true; +} + /** There are two cases: for single argument and variadic. * Code for single argument is much more efficient. */ @@ -116,13 +138,29 @@ public: { const ColumnNullable * column = assert_cast(columns[0]); const UInt8 * null_map = column->getNullMapData().data(); - const IColumn * nested_column = &column->getNestedColumn(); + const IColumn * columns_param[] = {&column->getNestedColumn()}; + const IColumn * filter_column = columns[num_arguments - 1]; if (const ColumnNullable * nullable_column = typeid_cast(filter_column)) - filter_column = &nullable_column->getNestedColumn(); - const IColumn * column_param[] = {nested_column, filter_column}; + filter_column = nullable_column->getNestedColumnPtr().get(); + if constexpr (result_is_nullable) + { + /// We need to check if there is work to do as otherwise setting the flag would be a mistake, + /// it would mean that the return value would be the default value of the nested type instead of NULL + if (is_all_zeros(assert_cast(filter_column)->getData().data(), batch_size)) + return; + } + + /// Combine the 2 flag arrays so we can call a simplified version (one check vs 2) + /// Note that now the null map will contain 0 if not null and not filtered, or 1 for null or filtered (or both) + const auto * filter_flags = assert_cast(filter_column)->getData().data(); + auto final_nulls = std::make_unique(batch_size); + for (size_t i = 0; i < batch_size; ++i) + final_nulls[i] = null_map[i] | !filter_flags[i]; + + this->nested_function->addBatchSinglePlaceNotNull( + batch_size, this->nestedPlace(place), columns_param, final_nulls.get(), arena, -1); - this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), column_param, null_map, arena, 1); if constexpr (result_is_nullable) if (!memoryIsByte(null_map, batch_size, 1)) this->setFlag(place); diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 2783f2cd43e..24c1e514a71 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -404,7 +404,7 @@ public: const auto * if_flags = assert_cast(*columns[if_argument_pos]).getData().data(); auto final_flags = std::make_unique(batch_size); for (size_t i = 0; i < batch_size; ++i) - final_flags[i] = !null_map[i] && if_flags[i]; + final_flags[i] = (!null_map[i]) & if_flags[i]; this->data(place).addManyConditional(column.getData().data(), final_flags.get(), batch_size); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index a06f1d12c0d..cdc6b8f2065 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -190,6 +190,7 @@ public: size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const = 0; /** The same for single place when need to aggregate only filtered data. + * Instead of using an if-column, the condition is combined inside the null_map */ virtual void addBatchSinglePlaceNotNull( size_t batch_size,