Handle null column correctly in JSONFunction

This commit is contained in:
Amos Bird 2021-09-10 18:27:58 +08:00
parent 15e80ead3f
commit b25c3fb659
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
5 changed files with 153 additions and 37 deletions

View File

@ -58,21 +58,21 @@ size_t FunctionJSONHelpers::calculateMaxSize(const ColumnString::Offsets & offse
void registerFunctionsJSON(FunctionFactory & factory)
{
factory.registerFunction<FunctionJSON<NameJSONHas, JSONHasImpl>>();
factory.registerFunction<FunctionJSON<NameIsValidJSON, IsValidJSONImpl>>();
factory.registerFunction<FunctionJSON<NameJSONLength, JSONLengthImpl>>();
factory.registerFunction<FunctionJSON<NameJSONKey, JSONKeyImpl>>();
factory.registerFunction<FunctionJSON<NameJSONType, JSONTypeImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractInt, JSONExtractInt64Impl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractUInt, JSONExtractUInt64Impl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractFloat, JSONExtractFloat64Impl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractBool, JSONExtractBoolImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractString, JSONExtractStringImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtract, JSONExtractImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractKeysAndValues, JSONExtractKeysAndValuesImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractRaw, JSONExtractRawImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractArrayRaw, JSONExtractArrayRawImpl>>();
factory.registerFunction<FunctionJSON<NameJSONExtractKeysAndValuesRaw, JSONExtractKeysAndValuesRawImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONHas, JSONHasImpl>>();
factory.registerFunction<JSONOverloadResolver<NameIsValidJSON, IsValidJSONImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONLength, JSONLengthImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONKey, JSONKeyImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONType, JSONTypeImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractInt, JSONExtractInt64Impl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractUInt, JSONExtractUInt64Impl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractFloat, JSONExtractFloat64Impl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractBool, JSONExtractBoolImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractString, JSONExtractStringImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtract, JSONExtractImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValues, JSONExtractKeysAndValuesImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractRaw, JSONExtractRawImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractArrayRaw, JSONExtractArrayRawImpl>>();
factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValuesRaw, JSONExtractKeysAndValuesRawImpl>>();
}
}

View File

@ -26,9 +26,11 @@
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
#include <common/range.h>
#include <type_traits>
@ -276,37 +278,139 @@ private:
template <typename Name, template<typename> typename Impl>
class FunctionJSON : public IFunction, WithContext
class ExecutableFunctionJSON : public IExecutableFunction, WithContext
{
public:
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionJSON>(context_); }
FunctionJSON(ContextPtr context_) : WithContext(context_) {}
static constexpr auto name = Name::name;
String getName() const override { return Name::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_)
: null_presence(null_presence_), allow_simdjson(allow_simdjson_)
{
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments);
}
String getName() const override { return Name::name; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
if (null_presence.has_null_constant)
return result_type->createColumnConstWithDefaultValue(input_rows_count);
ColumnsWithTypeAndName temporary_columns;
if (null_presence.has_nullable)
temporary_columns = createBlockWithNestedColumns(arguments);
else
temporary_columns = arguments;
ColumnPtr temporary_result;
/// Choose JSONParser.
#if USE_SIMDJSON
if (getContext()->getSettingsRef().allow_simdjson)
return FunctionJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count);
if (allow_simdjson)
temporary_result = FunctionJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(
temporary_columns, result_type, input_rows_count);
else
#endif
{
#if USE_RAPIDJSON
return FunctionJSONHelpers::Executor<Name, Impl, RapidJSONParser>::run(arguments, result_type, input_rows_count);
temporary_result = FunctionJSONHelpers::Executor<Name, Impl, RapidJSONParser>::run(
temporary_columns, result_type, input_rows_count);
#else
return FunctionJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count);
temporary_result
= FunctionJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(temporary_columns, result_type, input_rows_count);
#endif
}
if (null_presence.has_nullable)
return wrapInNullable(temporary_result, arguments, result_type, input_rows_count);
return temporary_result;
}
private:
NullPresence null_presence;
bool allow_simdjson;
};
template <typename Name, template<typename> typename Impl>
class FunctionBaseFunctionJSON : public IFunctionBase
{
public:
explicit FunctionBaseFunctionJSON(
const NullPresence & null_presence_, bool allow_simdjson_, DataTypes argument_types_, DataTypePtr return_type_)
: null_presence(null_presence_)
, allow_simdjson(allow_simdjson_)
, argument_types(std::move(argument_types_))
, return_type(std::move(return_type_))
{
}
String getName() const override { return Name::name; }
const DataTypes & getArgumentTypes() const override
{
return argument_types;
}
const DataTypePtr & getResultType() const override
{
return return_type;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override
{
return std::make_unique<ExecutableFunctionJSON<Name, Impl>>(null_presence, allow_simdjson);
}
private:
NullPresence null_presence;
bool allow_simdjson;
DataTypes argument_types;
DataTypePtr return_type;
};
template <typename Name, template<typename> typename Impl>
class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext
{
public:
static constexpr auto name = Name::name;
String getName() const override { return name; }
static FunctionOverloadResolverPtr create(ContextPtr context_)
{
return std::make_unique<JSONOverloadResolver>(context_);
}
explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {}
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
// Both NULL and JSON NULL should generate NULL value.
// If any argument is NULL, return NULL.
bool useDefaultImplementationForNulls() const override { return false; }
FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override
{
NullPresence null_presence = getNullPresense(arguments);
DataTypePtr return_type;
if (null_presence.has_null_constant)
return_type = makeNullable(std::make_shared<DataTypeNothing>());
else if (null_presence.has_nullable)
return_type = makeNullable(Impl<DummyJSONParser>::getReturnType(Name::name, createBlockWithNestedColumns(arguments)));
else
return_type = Impl<DummyJSONParser>::getReturnType(Name::name, arguments);
DataTypes argument_types;
argument_types.reserve(arguments.size());
for (const auto & argument : arguments)
argument_types.emplace_back(argument.type);
return std::make_unique<FunctionBaseFunctionJSON<Name, Impl>>(
null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type);
}
};

View File

@ -278,9 +278,7 @@ class IFunctionOverloadResolver
public:
virtual ~IFunctionOverloadResolver() = default;
FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const;
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const;
virtual FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const;
void getLambdaArgumentTypes(DataTypes & arguments) const;
@ -322,7 +320,10 @@ public:
protected:
virtual FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const = 0;
virtual FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const
{
throw Exception("buildImpl is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const
{
@ -360,6 +361,8 @@ protected:
private:
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const;
DataTypePtr getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const;
};

View File

@ -0,0 +1 @@
\N \N \N

View File

@ -0,0 +1,8 @@
drop table if exists test_table;
create table test_table (col String, col_nullable Nullable(String)) engine MergeTree order by col;
insert into test_table select '{"string_value":null}' as col, '{"string_value":null}' as col_nullable;
select JSONExtract(col, 'string_value', 'Nullable(String)') as res1, JSONExtract(col_nullable, 'string_value', 'Nullable(String)') as res2, JSONExtract(assumeNotNull(col_nullable), 'string_value', 'Nullable(String)') as res3 from test_table;
drop table test_table;