Add default implementation for Nothing, support arrays of nullable for arrayFilter and similar functions

This commit is contained in:
avogar 2022-05-12 15:15:31 +00:00
parent 246aafa58a
commit 0311dbb422
25 changed files with 232 additions and 24 deletions

View File

@ -16,7 +16,7 @@ class SerializationNothing : public SimpleTextSerialization
private:
[[noreturn]] static void throwNoSerialization()
{
throw Exception("Serialization is not implemented", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("Serialization is not implemented for type Nothing", ErrorCodes::NOT_IMPLEMENTED);
}
public:
void serializeBinary(const Field &, WriteBuffer &) const override { throwNoSerialization(); }

View File

@ -98,6 +98,7 @@ protected:
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
private:

View File

@ -2509,6 +2509,7 @@ protected:
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }

View File

@ -50,7 +50,8 @@ public:
return expr_columns.getByName(signature->return_name).column;
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
private:
ExpressionActionsPtr expression_actions;
@ -118,6 +119,7 @@ public:
String getName() const override { return "FunctionCapture"; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
@ -247,6 +249,7 @@ public:
String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override { return return_type; }
size_t getNumberOfArguments() const override { return capture->captured_types.size(); }

View File

@ -9,6 +9,7 @@
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnNothing.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Native.h>
@ -203,6 +204,31 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
return nullptr;
}
ColumnPtr IExecutableFunction::defaultImplementationForNothing(
const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const
{
if (!useDefaultImplementationForNothing())
return nullptr;
bool is_nothing_type_presented = false;
for (const auto & arg : args)
is_nothing_type_presented |= isNothing(arg.type);
if (!is_nothing_type_presented)
return nullptr;
if (!isNothing(result_type))
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Function {} with argument with type Nothing and default implementation for Nothing "
"is expected to return result with type Nothing, got {}",
getName(),
result_type->getName());
return ColumnConst::create(ColumnNothing::create(1), input_rows_count);
}
ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns(
const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
@ -212,6 +238,9 @@ ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns(
if (auto res = defaultImplementationForNulls(args, result_type, input_rows_count, dry_run))
return res;
if (auto res = defaultImplementationForNothing(args, result_type, input_rows_count))
return res;
ColumnPtr res;
if (dry_run)
res = executeDryRunImpl(args, result_type, input_rows_count);
@ -275,11 +304,6 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
/// Result type Nothing means that we don't need to execute function at all.
/// Example: select arrayMap(x -> 2 * x, []);
if (isNothing(result_type))
return result_type->createColumn();
if (useDefaultImplementationForSparseColumns())
{
size_t num_sparse_columns = 0;
@ -435,13 +459,13 @@ DataTypePtr IFunctionOverloadResolver::getReturnTypeWithoutLowCardinality(const
}
}
/// If one of the arguments is Nothing, then we won't really execute
/// the function and the result type should be also Nothing.
/// Example: select arrayMap(x -> 2 * x, []);
for (const auto & arg : arguments)
if (!arguments.empty() && useDefaultImplementationForNothing())
{
if (isNothing(arg.type))
return std::make_shared<DataTypeNothing>();
for (const auto & arg : arguments)
{
if (isNothing(arg.type))
return std::make_shared<DataTypeNothing>();
}
}
return getReturnTypeImpl(arguments);

View File

@ -63,6 +63,11 @@ protected:
*/
virtual bool useDefaultImplementationForNulls() const { return true; }
/** Default implementation in presence of arguments with type Nothing is the following:
* If some of arguments have type Nothing then default implementation is to return constant column with type Nothing
*/
virtual bool useDefaultImplementationForNothing() const { return true; }
/** If the function have non-zero number of arguments,
* and if all arguments are constant, that we could automatically provide default implementation:
* arguments are converted to ordinary columns with single value, then function is executed as usual,
@ -100,6 +105,9 @@ private:
ColumnPtr defaultImplementationForNulls(
const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
ColumnPtr defaultImplementationForNothing(
const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const;
ColumnPtr executeWithoutLowCardinalityColumns(
const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
@ -166,8 +174,8 @@ public:
/** If function isSuitableForConstantFolding then, this method will be called during query analyzis
* if some arguments are constants. For example logical functions (AndFunction, OrFunction) can
* return they result based on some constant arguments.
* Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForConstants,
* useDefaultImplementationForLowCardinality are not applied.
* Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForNothing,
* useDefaultImplementationForConstants, useDefaultImplementationForLowCardinality are not applied.
*/
virtual ColumnPtr getConstantResultForNonConstArguments(
const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; }
@ -354,7 +362,13 @@ protected:
*/
virtual bool useDefaultImplementationForNulls() const { return true; }
/** If useDefaultImplementationForNulls() is true, then change arguments for getReturnType() and build().
/** If useDefaultImplementationForNothing() is true, then change arguments for getReturnType() and build():
* if some of arguments are Nothing then don't call getReturnType(), call build() with return_type = Nothing,
* Otherwise build returns build(arguments, getReturnType(arguments));
*/
virtual bool useDefaultImplementationForNothing() const { return true; }
/** If useDefaultImplementationForLowCardinalityColumns() is true, then change arguments for getReturnType() and build().
* If function arguments has low cardinality types, convert them to ordinary types.
* getReturnType returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality.
*/
@ -403,6 +417,11 @@ public:
*/
virtual bool useDefaultImplementationForNulls() const { return true; }
/** Default implementation in presence of arguments with type Nothing is the following:
* If some of arguments have type Nothing then default implementation is to return constant column with type Nothing
*/
virtual bool useDefaultImplementationForNothing() const { return true; }
/** If the function have non-zero number of arguments,
* and if all arguments are constant, that we could automatically provide default implementation:
* arguments are converted to ordinary columns with single value, then function is executed as usual,

View File

@ -27,6 +27,7 @@ protected:
}
bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); }
bool useDefaultImplementationForNothing() const final { return function->useDefaultImplementationForNothing(); }
bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); }
bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); }
bool useDefaultImplementationForSparseColumns() const final { return function->useDefaultImplementationForSparseColumns(); }
@ -124,6 +125,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { return function->getReturnTypeImpl(arguments); }
bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); }
bool useDefaultImplementationForNothing() const override { return function->useDefaultImplementationForNothing(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); }
bool useDefaultImplementationForSparseColumns() const override { return function->useDefaultImplementationForSparseColumns(); }
bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); }

View File

@ -6,6 +6,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/IColumn.h>
#include <Common/Exception.h>
@ -16,11 +17,13 @@
#include <DataTypes/DataTypeFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/castColumn.h>
#include <IO/WriteHelpers.h>
@ -156,7 +159,7 @@ public:
DataTypePtr nested_type = data_type->getNestedType();
if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8())
if (Impl::needBoolean() && !isUInt8(nested_type))
throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found "
+ arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -180,8 +183,14 @@ public:
/// The types of the remaining arguments are already checked in getLambdaArgumentTypes.
DataTypePtr return_type = removeLowCardinality(data_type_function->getReturnType());
if (Impl::needBoolean() && !WhichDataType(return_type).isUInt8())
throw Exception("Expression for function " + getName() + " must return UInt8, found "
/// Special cases when we need boolean lambda result:
/// - lambda may return Nullable(UInt8) column, in this case after lambda execution we will
/// replace all NULLs with 0 and return nested UInt8 column.
/// - lambda may return Nothing or Nullable(Nothing) because of default implementation of functions
/// for these types. In this case we will just create UInt8 const column full of 0.
if (Impl::needBoolean() && !isUInt8(removeNullable(return_type)) && !isNothing(removeNullable(return_type)))
throw Exception("Expression for function " + getName() + " must return UInt8 or Nullable(UInt8), found "
+ return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
static_assert(
@ -316,11 +325,38 @@ public:
auto * replicated_column_function = typeid_cast<ColumnFunction *>(replicated_column_function_ptr.get());
replicated_column_function->appendArguments(arrays);
auto lambda_result = replicated_column_function->reduce().column;
if (lambda_result->lowCardinality())
lambda_result = lambda_result->convertToFullColumnIfLowCardinality();
auto lambda_result = replicated_column_function->reduce();
if (lambda_result.column->lowCardinality())
lambda_result.column = lambda_result.column->convertToFullColumnIfLowCardinality();
return Impl::execute(*column_first_array, lambda_result);
if (Impl::needBoolean())
{
toColumnTypeName
/// If result column is Nothing or Nullable(Nothing), just create const UInt8 column with 0 value.
if (isNothing(removeNullable(lambda_result.type)))
{
auto result_type = std::make_shared<DataTypeUInt8>();
lambda_result.column = result_type->createColumnConst(lambda_result.column->size(), 0);
}
/// If result column is Nullable(UInt8), then extract nested column and write 0 in all rows
/// when we have NULL.
else if (lambda_result.column->isNullable())
{
auto result_column = IColumn::mutate(std::move(lambda_result.column));
auto * column_nullable = assert_cast<ColumnNullable *>(result_column.get());
auto & null_map = column_nullable->getNullMapData();
auto nested_column = IColumn::mutate(std::move(column_nullable->getNestedColumnPtr()));
auto & nested_data = assert_cast<ColumnUInt8 *>(nested_column.get())->getData();
for (size_t i = 0; i != nested_data.size(); ++i)
{
if (null_map[i])
nested_data[i] = 0;
}
lambda_result.column = std::move(nested_column);
}
}
return Impl::execute(*column_first_array, lambda_result.column);
}
}
};

View File

@ -20,6 +20,7 @@ public:
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isVariadic() const override { return true; }

View File

@ -33,6 +33,7 @@ public:
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }

View File

@ -52,6 +52,7 @@ public:
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
@ -194,6 +195,7 @@ private:
bool isVariadic() const override { return true; }
bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); }
bool useDefaultImplementationForNothing() const override { return impl.useDefaultImplementationForNothing(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns();}
bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants();}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override

View File

@ -39,6 +39,8 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool isSuitableForConstantFolding() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }

View File

@ -27,6 +27,8 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
size_t getNumberOfArguments() const override

View File

@ -65,6 +65,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override

View File

@ -23,6 +23,11 @@ public:
return false;
}
bool useDefaultImplementationForNothing() const override
{
return false;
}
/// Get the function name.
String getName() const override
{

View File

@ -26,6 +26,8 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
{
settings.enable_lazy_execution_for_first_argument = true;

View File

@ -28,6 +28,7 @@ public:
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }

View File

@ -30,6 +30,8 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
{
settings.enable_lazy_execution_for_first_argument = false;

View File

@ -52,6 +52,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override

View File

@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "create table data_01882 (key Int) Engine=MergeTree() part
# thus check few times to be sure that this is not coincidence.
for _ in {1..30}; do
$CLICKHOUSE_CURL -vsS "${CLICKHOUSE_URL}&max_threads=1&default_format=Null&send_progress_in_http_headers=1&http_headers_progress_interval_ms=1" --data-binary @- <<< "select * from data_01882" |& {
grep -o -F '"total_rows_to_read":"10"'
grep -F '"total_rows_to_read"'
} | {
# grep out final result
grep -v -F '"read_rows":"10"'

View File

@ -4,3 +4,32 @@ Array(Nothing)
Array(Nothing)
[]
Array(Nothing)
Array(Nothing)
Array(Nothing)
[]
Array(Nothing)
Array(Nothing)
Array(Nothing)
Nothing
Const(Nothing)
Nothing
Const(Nothing)
Nothing
Nothing
Array(Nothing)
Const(Array(Nothing))
Array(Nothing)
Array(Nothing)
Map(UInt8, Nothing)
Const(Map(UInt8, Nothing))
Map(UInt8, Nothing)
Map(UInt8, Nothing)
Tuple(UInt8, Nothing)
Const(Tuple(UInt8, Nothing))
Tuple(UInt8, Nothing)
Tuple(UInt8, Nothing)
Nothing
Const(Nothing)
Nothing
Nothing

View File

@ -4,3 +4,40 @@ select arrayMap((x, y) -> x + y, [], []);
select toTypeName(arrayMap((x, y) -> x + y, [], []));
select arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)'));
select toTypeName(arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)')));
select toTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)]));
select toColumnTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)]));
select arrayFilter(x -> 2 * x < 0, []);
select toTypeName(arrayFilter(x -> 2 * x < 0, []));
select toTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)]));
select toColumnTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)]));
select CAST(assumeNotNull(NULL), 'String');
select toTypeName(toInt32(assumeNotNull(NULL)));
select toColumnTypeName(toInt32(assumeNotNull(NULL)));
select toTypeName(assumeNotNull(NULL));
select toColumnTypeName(assumeNotNull(NULL));
select toTypeName(assumeNotNull(materialize(NULL)));
select toColumnTypeName(assumeNotNull(materialize(NULL)));
select toTypeName([assumeNotNull(NULL)]);
select toColumnTypeName([assumeNotNull(NULL)]);
select toTypeName([assumeNotNull(materialize(NULL))]);
select toColumnTypeName([assumeNotNull(materialize(NULL))]);
select toTypeName(map(1, assumeNotNull(NULL)));
select toColumnTypeName(map(1, assumeNotNull(NULL)));
select toTypeName(map(1, assumeNotNull(materialize(NULL))));
select toColumnTypeName(map(1, assumeNotNull(materialize(NULL))));
select toTypeName(tuple(1, assumeNotNull(NULL)));
select toColumnTypeName(tuple(1, assumeNotNull(NULL)));
select toTypeName(tuple(1, assumeNotNull(materialize(NULL))));
select toColumnTypeName(tuple(1, assumeNotNull(materialize(NULL))));
select toTypeName(assumeNotNull(NULL) * 2);
select toColumnTypeName(assumeNotNull(NULL) * 2);
select toTypeName(assumeNotNull(materialize(NULL)) * 2);
select toColumnTypeName(assumeNotNull(materialize(NULL)) * 2);

View File

@ -0,0 +1,28 @@
select CAST(assumeNotNull(NULL), 'String')
select toTypeName(assumeNotNull(NULL));
select toColumnName(assumeNotNull(NULL));
select toTypeName(assumeNotNull(materialize(NULL)));
select toColumnName(assumeNotNull(materialize(NULL)));
select toTypeName([assumeNotNull(NULL)]);
select toColumnName([assumeNotNull(NULL)]);
select toTypeName([assumeNotNull(materialize(NULL))]);
select toColumnName([assumeNotNull(materialize(NULL))]);
select toTypeName(map(1, assumeNotNull(NULL)));
select toColumnName(map(1, assumeNotNull(NULL)));
select toTypeName(map(1, assumeNotNull(materialize(NULL))));
select toColumnName(map(1, assumeNotNull(materialize(NULL))));
select toTypeName(tuple(1, assumeNotNull(NULL)));
select toColumnName(tuple(1, assumeNotNull(NULL)));
select toTypeName(tuple(1, assumeNotNull(materialize(NULL))));
select toColumnName(tuple(1, assumeNotNull(materialize(NULL))));
select toTypeName(assumeNotNull(NULL) * 2);
select toColumnName(assumeNotNull(NULL) * 2);
select toTypeName(assumeNotNull(materialize(NULL)) * 2);
select toColumnName(assumeNotNull(materialize(NULL)) * 2);

View File

@ -0,0 +1,4 @@
[]
[]
[2,4]
[1,3]

View File

@ -0,0 +1,4 @@
select arrayFilter(x -> 2 * x > 0, []);
select arrayFilter(x -> 2 * x > 0, [NULL]);
select arrayFilter(x -> x % 2 ? NULL : 1, [1, 2, 3, 4]);
select arrayFilter(x -> x % 2, [1, NULL, 3, NULL]);