2021-10-15 10:12:59 +00:00
# include <type_traits>
2021-10-14 20:10:53 +00:00
# include <boost/tti/has_member_function.hpp>
# include <base/range.h>
# include <Common/CpuId.h>
# include <Common/typeid_cast.h>
# include <Common/assert_cast.h>
# include <Core/AccurateComparison.h>
# include <Core/Settings.h>
# include <Columns/ColumnConst.h>
# include <Columns/ColumnLowCardinality.h>
# include <Columns/ColumnDecimal.h>
# include <Columns/ColumnString.h>
# include <Columns/ColumnVector.h>
# include <Columns/ColumnFixedString.h>
# include <Columns/ColumnNullable.h>
# include <Columns/ColumnArray.h>
# include <Columns/ColumnTuple.h>
# include <DataTypes/Serializations/SerializationDecimal.h>
# include <DataTypes/DataTypesNumber.h>
# include <DataTypes/DataTypeLowCardinality.h>
# include <DataTypes/DataTypeString.h>
# include <DataTypes/DataTypesDecimal.h>
# include <DataTypes/DataTypeUUID.h>
# include <DataTypes/DataTypeEnum.h>
# include <DataTypes/DataTypeFactory.h>
# include <DataTypes/DataTypeNothing.h>
# include <DataTypes/DataTypeNullable.h>
# include <DataTypes/DataTypeArray.h>
# include <DataTypes/DataTypeTuple.h>
2019-05-16 19:39:42 +00:00
# include <Functions/FunctionFactory.h>
2021-10-14 20:10:53 +00:00
# include <Functions/IFunction.h>
2021-11-09 12:36:25 +00:00
# include <Common/JSONParsers/DummyJSONParser.h>
# include <Common/JSONParsers/SimdJSONParser.h>
# include <Common/JSONParsers/RapidJSONParser.h>
2021-10-14 20:10:53 +00:00
# include <Functions/FunctionHelpers.h>
# include <Interpreters/Context.h>
2021-10-27 23:10:39 +00:00
# include "config_functions.h"
2019-03-14 02:55:04 +00:00
2019-05-07 23:31:35 +00:00
namespace DB
{
2021-10-14 20:10:53 +00:00
2020-07-11 21:04:22 +00:00
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT ;
2021-10-14 20:10:53 +00:00
extern const int ILLEGAL_COLUMN ;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH ;
2020-07-11 21:04:22 +00:00
}
2021-10-15 20:26:09 +00:00
template < typename T >
concept HasIndexOperator = requires ( T t )
{
t [ 0 ] ;
} ;
2020-07-11 21:04:22 +00:00
2021-10-14 20:10:53 +00:00
/// Functions to parse JSONs and extract values from it.
/// The first argument of all these functions gets a JSON,
/// after that there are any number of arguments specifying path to a desired part from the JSON's root.
/// For example,
/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100
2020-07-11 21:04:22 +00:00
2021-10-14 20:10:53 +00:00
class FunctionJSONHelpers
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
public :
template < typename Name , template < typename > typename Impl , class JSONParser >
class Executor
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
public :
static ColumnPtr run ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & result_type , size_t input_rows_count )
{
MutableColumnPtr to { result_type - > createColumn ( ) } ;
to - > reserve ( input_rows_count ) ;
if ( arguments . empty ( ) )
throw Exception { " Function " + String ( Name : : name ) + " requires at least one argument " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & first_column = arguments [ 0 ] ;
if ( ! isString ( first_column . type ) )
throw Exception { " The first argument of function " + String ( Name : : name ) + " should be a string containing JSON, illegal type: " + first_column . type - > getName ( ) ,
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
const ColumnPtr & arg_json = first_column . column ;
const auto * col_json_const = typeid_cast < const ColumnConst * > ( arg_json . get ( ) ) ;
const auto * col_json_string
= typeid_cast < const ColumnString * > ( col_json_const ? col_json_const - > getDataColumnPtr ( ) . get ( ) : arg_json . get ( ) ) ;
if ( ! col_json_string )
throw Exception { " Illegal column " + arg_json - > getName ( ) , ErrorCodes : : ILLEGAL_COLUMN } ;
const ColumnString : : Chars & chars = col_json_string - > getChars ( ) ;
const ColumnString : : Offsets & offsets = col_json_string - > getOffsets ( ) ;
size_t num_index_arguments = Impl < JSONParser > : : getNumberOfIndexArguments ( arguments ) ;
std : : vector < Move > moves = prepareMoves ( Name : : name , arguments , 1 , num_index_arguments ) ;
/// Preallocate memory in parser if necessary.
JSONParser parser ;
if constexpr ( has_member_function_reserve < void ( JSONParser : : * ) ( size_t ) > : : value )
{
size_t max_size = calculateMaxSize ( offsets ) ;
if ( max_size )
parser . reserve ( max_size ) ;
}
Impl < JSONParser > impl ;
/// prepare() does Impl-specific preparation before handling each row.
if constexpr ( has_member_function_prepare < void ( Impl < JSONParser > : : * ) ( const char * , const ColumnsWithTypeAndName & , const DataTypePtr & ) > : : value )
impl . prepare ( Name : : name , arguments , result_type ) ;
using Element = typename JSONParser : : Element ;
Element document ;
bool document_ok = false ;
if ( col_json_const )
{
2022-05-27 20:51:37 +00:00
std : : string_view json { reinterpret_cast < const char * > ( chars . data ( ) ) , offsets [ 0 ] - 1 } ;
2021-10-14 20:10:53 +00:00
document_ok = parser . parse ( json , document ) ;
}
for ( const auto i : collections : : range ( 0 , input_rows_count ) )
{
if ( ! col_json_const )
{
std : : string_view json { reinterpret_cast < const char * > ( & chars [ offsets [ i - 1 ] ] ) , offsets [ i ] - offsets [ i - 1 ] - 1 } ;
document_ok = parser . parse ( json , document ) ;
}
bool added_to_column = false ;
if ( document_ok )
{
/// Perform moves.
Element element ;
std : : string_view last_key ;
bool moves_ok = performMoves < JSONParser > ( arguments , i , document , moves , element , last_key ) ;
2020-07-11 21:04:22 +00:00
2021-10-14 20:10:53 +00:00
if ( moves_ok )
added_to_column = impl . insertResultToColumn ( * to , element , last_key ) ;
}
/// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions.
if ( ! added_to_column )
to - > insertDefault ( ) ;
}
return to ;
}
} ;
private :
BOOST_TTI_HAS_MEMBER_FUNCTION ( reserve )
BOOST_TTI_HAS_MEMBER_FUNCTION ( prepare )
/// Represents a move of a JSON iterator described by a single argument passed to a JSON function.
/// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1)
/// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}.
/// Keys and indices can be nonconst, in this case they are calculated for each row.
enum class MoveType
{
Key ,
Index ,
ConstKey ,
ConstIndex ,
} ;
struct Move
{
2021-10-15 07:05:39 +00:00
explicit Move ( MoveType type_ , size_t index_ = 0 ) : type ( type_ ) , index ( index_ ) { }
2021-10-14 20:10:53 +00:00
Move ( MoveType type_ , const String & key_ ) : type ( type_ ) , key ( key_ ) { }
MoveType type ;
size_t index = 0 ;
String key ;
} ;
static std : : vector < FunctionJSONHelpers : : Move > prepareMoves (
const char * function_name ,
const ColumnsWithTypeAndName & columns ,
size_t first_index_argument ,
size_t num_index_arguments )
{
std : : vector < Move > moves ;
moves . reserve ( num_index_arguments ) ;
for ( const auto i : collections : : range ( first_index_argument , first_index_argument + num_index_arguments ) )
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
const auto & column = columns [ i ] ;
if ( ! isString ( column . type ) & & ! isInteger ( column . type ) )
throw Exception { " The argument " + std : : to_string ( i + 1 ) + " of function " + String ( function_name )
+ " should be a string specifying key or an integer specifying index, illegal type: " + column . type - > getName ( ) ,
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
if ( column . column & & isColumnConst ( * column . column ) )
{
const auto & column_const = assert_cast < const ColumnConst & > ( * column . column ) ;
if ( isString ( column . type ) )
moves . emplace_back ( MoveType : : ConstKey , column_const . getValue < String > ( ) ) ;
else
moves . emplace_back ( MoveType : : ConstIndex , column_const . getInt ( 0 ) ) ;
}
2020-07-11 21:04:22 +00:00
else
2021-10-14 20:10:53 +00:00
{
if ( isString ( column . type ) )
moves . emplace_back ( MoveType : : Key , " " ) ;
else
moves . emplace_back ( MoveType : : Index , 0 ) ;
}
}
return moves ;
}
/// Performs moves of types MoveType::Index and MoveType::ConstIndex.
template < typename JSONParser >
static bool performMoves ( const ColumnsWithTypeAndName & arguments , size_t row ,
const typename JSONParser : : Element & document , const std : : vector < Move > & moves ,
typename JSONParser : : Element & element , std : : string_view & last_key )
{
typename JSONParser : : Element res_element = document ;
std : : string_view key ;
for ( size_t j = 0 ; j ! = moves . size ( ) ; + + j )
{
switch ( moves [ j ] . type )
{
case MoveType : : ConstIndex :
{
if ( ! moveToElementByIndex < JSONParser > ( res_element , moves [ j ] . index , key ) )
return false ;
break ;
}
case MoveType : : ConstKey :
{
key = moves [ j ] . key ;
if ( ! moveToElementByKey < JSONParser > ( res_element , key ) )
return false ;
break ;
}
case MoveType : : Index :
{
Int64 index = ( * arguments [ j + 1 ] . column ) [ row ] . get < Int64 > ( ) ;
if ( ! moveToElementByIndex < JSONParser > ( res_element , index , key ) )
return false ;
break ;
}
case MoveType : : Key :
{
key = std : : string_view { ( * arguments [ j + 1 ] . column ) . getDataAt ( row ) } ;
if ( ! moveToElementByKey < JSONParser > ( res_element , key ) )
return false ;
break ;
}
}
}
element = res_element ;
last_key = key ;
return true ;
}
2020-07-11 21:04:22 +00:00
2021-10-14 20:10:53 +00:00
template < typename JSONParser >
static bool moveToElementByIndex ( typename JSONParser : : Element & element , int index , std : : string_view & out_key )
{
if ( element . isArray ( ) )
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
auto array = element . getArray ( ) ;
if ( index > = 0 )
- - index ;
2020-07-11 21:04:22 +00:00
else
2021-10-14 20:10:53 +00:00
index + = array . size ( ) ;
if ( static_cast < size_t > ( index ) > = array . size ( ) )
return false ;
element = array [ index ] ;
out_key = { } ;
return true ;
2020-07-11 21:04:22 +00:00
}
2021-10-14 20:10:53 +00:00
2021-10-15 20:26:09 +00:00
if constexpr ( HasIndexOperator < typename JSONParser : : Object > )
2021-10-14 20:10:53 +00:00
{
if ( element . isObject ( ) )
{
auto object = element . getObject ( ) ;
if ( index > = 0 )
- - index ;
else
index + = object . size ( ) ;
if ( static_cast < size_t > ( index ) > = object . size ( ) )
return false ;
std : : tie ( out_key , element ) = object [ index ] ;
return true ;
}
}
return { } ;
}
/// Performs moves of types MoveType::Key and MoveType::ConstKey.
template < typename JSONParser >
static bool moveToElementByKey ( typename JSONParser : : Element & element , const std : : string_view & key )
{
if ( ! element . isObject ( ) )
return false ;
auto object = element . getObject ( ) ;
return object . find ( key , element ) ;
}
static size_t calculateMaxSize ( const ColumnString : : Offsets & offsets )
{
size_t max_size = 0 ;
for ( const auto i : collections : : range ( 0 , offsets . size ( ) ) )
{
size_t size = offsets [ i ] - offsets [ i - 1 ] ;
if ( max_size < size )
max_size = size ;
}
if ( max_size )
- - max_size ;
return max_size ;
}
} ;
template < typename Name , template < typename > typename Impl >
class ExecutableFunctionJSON : public IExecutableFunction , WithContext
{
public :
explicit ExecutableFunctionJSON ( const NullPresence & null_presence_ , bool allow_simdjson_ , const DataTypePtr & json_return_type_ )
: null_presence ( null_presence_ ) , allow_simdjson ( allow_simdjson_ ) , json_return_type ( json_return_type_ )
{
}
String getName ( ) const override { return Name : : name ; }
bool useDefaultImplementationForNulls ( ) const override { return false ; }
bool useDefaultImplementationForConstants ( ) const override { return true ; }
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & result_type , size_t input_rows_count ) const override
{
if ( null_presence . has_null_constant )
return result_type - > createColumnConstWithDefaultValue ( input_rows_count ) ;
ColumnsWithTypeAndName temporary_columns = null_presence . has_nullable ? createBlockWithNestedColumns ( arguments ) : arguments ;
ColumnPtr temporary_result = chooseAndRunJSONParser ( temporary_columns , json_return_type , input_rows_count ) ;
if ( null_presence . has_nullable )
return wrapInNullable ( temporary_result , arguments , result_type , input_rows_count ) ;
return temporary_result ;
}
private :
ColumnPtr
chooseAndRunJSONParser ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & result_type , size_t input_rows_count ) const
{
# if USE_SIMDJSON
if ( allow_simdjson )
return FunctionJSONHelpers : : Executor < Name , Impl , SimdJSONParser > : : run ( arguments , result_type , input_rows_count ) ;
# endif
# if USE_RAPIDJSON
return FunctionJSONHelpers : : Executor < Name , Impl , RapidJSONParser > : : run ( arguments , result_type , input_rows_count ) ;
# else
return FunctionJSONHelpers : : Executor < Name , Impl , DummyJSONParser > : : run ( arguments , result_type , input_rows_count ) ;
# endif
}
NullPresence null_presence ;
bool allow_simdjson ;
DataTypePtr json_return_type ;
} ;
template < typename Name , template < typename > typename Impl >
class FunctionBaseFunctionJSON : public IFunctionBase
{
public :
explicit FunctionBaseFunctionJSON (
const NullPresence & null_presence_ ,
bool allow_simdjson_ ,
DataTypes argument_types_ ,
DataTypePtr return_type_ ,
DataTypePtr json_return_type_ )
: null_presence ( null_presence_ )
, allow_simdjson ( allow_simdjson_ )
, argument_types ( std : : move ( argument_types_ ) )
, return_type ( std : : move ( return_type_ ) )
, json_return_type ( std : : move ( json_return_type_ ) )
{
}
String getName ( ) const override { return Name : : name ; }
const DataTypes & getArgumentTypes ( ) const override
{
return argument_types ;
}
const DataTypePtr & getResultType ( ) const override
{
return return_type ;
}
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return true ; }
ExecutableFunctionPtr prepare ( const ColumnsWithTypeAndName & ) const override
{
return std : : make_unique < ExecutableFunctionJSON < Name , Impl > > ( null_presence , allow_simdjson , json_return_type ) ;
}
private :
NullPresence null_presence ;
bool allow_simdjson ;
DataTypes argument_types ;
DataTypePtr return_type ;
DataTypePtr json_return_type ;
} ;
/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing.
/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL.
template < typename Name , template < typename > typename Impl >
class JSONOverloadResolver : public IFunctionOverloadResolver , WithContext
{
public :
static constexpr auto name = Name : : name ;
String getName ( ) const override { return name ; }
static FunctionOverloadResolverPtr create ( ContextPtr context_ )
{
return std : : make_unique < JSONOverloadResolver > ( context_ ) ;
}
explicit JSONOverloadResolver ( ContextPtr context_ ) : WithContext ( context_ ) { }
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
bool useDefaultImplementationForNulls ( ) const override { return false ; }
FunctionBasePtr build ( const ColumnsWithTypeAndName & arguments ) const override
{
2022-06-01 16:34:31 +00:00
bool has_nothing_argument = false ;
for ( const auto & arg : arguments )
has_nothing_argument | = isNothing ( arg . type ) ;
2021-10-14 20:10:53 +00:00
DataTypePtr json_return_type = Impl < DummyJSONParser > : : getReturnType ( Name : : name , createBlockWithNestedColumns ( arguments ) ) ;
NullPresence null_presence = getNullPresense ( arguments ) ;
DataTypePtr return_type ;
2022-06-01 16:34:31 +00:00
if ( has_nothing_argument )
return_type = std : : make_shared < DataTypeNothing > ( ) ;
else if ( null_presence . has_null_constant )
2021-10-14 20:10:53 +00:00
return_type = makeNullable ( std : : make_shared < DataTypeNothing > ( ) ) ;
else if ( null_presence . has_nullable )
return_type = makeNullable ( json_return_type ) ;
2020-07-11 21:04:22 +00:00
else
2021-10-14 20:10:53 +00:00
return_type = json_return_type ;
/// Top-level LowCardinality columns are processed outside JSON parser.
json_return_type = removeLowCardinality ( json_return_type ) ;
DataTypes argument_types ;
argument_types . reserve ( arguments . size ( ) ) ;
for ( const auto & argument : arguments )
argument_types . emplace_back ( argument . type ) ;
return std : : make_unique < FunctionBaseFunctionJSON < Name , Impl > > (
null_presence , getContext ( ) - > getSettingsRef ( ) . allow_simdjson , argument_types , return_type , json_return_type ) ;
}
} ;
struct NameJSONHas { static constexpr auto name { " JSONHas " } ; } ;
struct NameIsValidJSON { static constexpr auto name { " isValidJSON " } ; } ;
struct NameJSONLength { static constexpr auto name { " JSONLength " } ; } ;
struct NameJSONKey { static constexpr auto name { " JSONKey " } ; } ;
struct NameJSONType { static constexpr auto name { " JSONType " } ; } ;
struct NameJSONExtractInt { static constexpr auto name { " JSONExtractInt " } ; } ;
struct NameJSONExtractUInt { static constexpr auto name { " JSONExtractUInt " } ; } ;
struct NameJSONExtractFloat { static constexpr auto name { " JSONExtractFloat " } ; } ;
struct NameJSONExtractBool { static constexpr auto name { " JSONExtractBool " } ; } ;
struct NameJSONExtractString { static constexpr auto name { " JSONExtractString " } ; } ;
struct NameJSONExtract { static constexpr auto name { " JSONExtract " } ; } ;
struct NameJSONExtractKeysAndValues { static constexpr auto name { " JSONExtractKeysAndValues " } ; } ;
struct NameJSONExtractRaw { static constexpr auto name { " JSONExtractRaw " } ; } ;
struct NameJSONExtractArrayRaw { static constexpr auto name { " JSONExtractArrayRaw " } ; } ;
struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name { " JSONExtractKeysAndValuesRaw " } ; } ;
struct NameJSONExtractKeys { static constexpr auto name { " JSONExtractKeys " } ; } ;
template < typename JSONParser >
class JSONHasImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & ) { return std : : make_shared < DataTypeUInt8 > ( ) ; }
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & )
{
ColumnVector < UInt8 > & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
col_vec . insertValue ( 1 ) ;
return true ;
}
} ;
template < typename JSONParser >
class IsValidJSONImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
{
if ( arguments . size ( ) ! = 1 )
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
/// IsValidJSON() shouldn't get parameters other than JSON.
throw Exception { " Function " + String ( function_name ) + " needs exactly one argument " ,
ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
}
return std : : make_shared < DataTypeUInt8 > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & ) { return 0 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & )
{
/// This function is called only if JSON is valid.
/// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function.
ColumnVector < UInt8 > & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
col_vec . insertValue ( 1 ) ;
return true ;
}
} ;
template < typename JSONParser >
class JSONLengthImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeUInt64 > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
size_t size ;
if ( element . isArray ( ) )
size = element . getArray ( ) . size ( ) ;
else if ( element . isObject ( ) )
size = element . getObject ( ) . size ( ) ;
else
return false ;
ColumnVector < UInt64 > & col_vec = assert_cast < ColumnVector < UInt64 > & > ( dest ) ;
col_vec . insertValue ( size ) ;
return true ;
}
} ;
template < typename JSONParser >
class JSONKeyImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeString > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & last_key )
{
if ( last_key . empty ( ) )
return false ;
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
col_str . insertData ( last_key . data ( ) , last_key . size ( ) ) ;
return true ;
}
} ;
template < typename JSONParser >
class JSONTypeImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
static const std : : vector < std : : pair < String , Int8 > > values = {
{ " Array " , ' [ ' } ,
{ " Object " , ' { ' } ,
{ " String " , ' " ' } ,
{ " Int64 " , ' i ' } ,
{ " UInt64 " , ' u ' } ,
{ " Double " , ' d ' } ,
{ " Bool " , ' b ' } ,
{ " Null " , 0 } , /// the default value for the column.
} ;
return std : : make_shared < DataTypeEnum < Int8 > > ( values ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
UInt8 type ;
if ( element . isInt64 ( ) )
type = ' i ' ;
else if ( element . isUInt64 ( ) )
type = ' u ' ;
else if ( element . isDouble ( ) )
type = ' d ' ;
else if ( element . isBool ( ) )
type = ' b ' ;
else if ( element . isString ( ) )
type = ' " ' ;
else if ( element . isArray ( ) )
type = ' [ ' ;
else if ( element . isObject ( ) )
type = ' { ' ;
else if ( element . isNull ( ) )
type = 0 ;
else
return false ;
ColumnVector < Int8 > & col_vec = assert_cast < ColumnVector < Int8 > & > ( dest ) ;
col_vec . insertValue ( type ) ;
return true ;
}
} ;
template < typename JSONParser , typename NumberType , bool convert_bool_to_integer = false >
class JSONExtractNumericImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeNumber < NumberType > > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
NumberType value ;
if ( element . isInt64 ( ) )
{
if ( ! accurate : : convertNumeric ( element . getInt64 ( ) , value ) )
return false ;
}
else if ( element . isUInt64 ( ) )
{
if ( ! accurate : : convertNumeric ( element . getUInt64 ( ) , value ) )
return false ;
}
else if ( element . isDouble ( ) )
{
if constexpr ( std : : is_floating_point_v < NumberType > )
{
/// We permit inaccurate conversion of double to float.
/// Example: double 0.1 from JSON is not representable in float.
/// But it will be more convenient for user to perform conversion.
value = element . getDouble ( ) ;
}
else if ( ! accurate : : convertNumeric ( element . getDouble ( ) , value ) )
return false ;
}
else if ( element . isBool ( ) & & is_integer < NumberType > & & convert_bool_to_integer )
{
value = static_cast < NumberType > ( element . getBool ( ) ) ;
2020-07-11 21:04:22 +00:00
}
else
2021-10-14 20:10:53 +00:00
return false ;
auto & col_vec = assert_cast < ColumnVector < NumberType > & > ( dest ) ;
col_vec . insertValue ( value ) ;
return true ;
}
} ;
template < typename JSONParser >
using JSONExtractInt8Impl = JSONExtractNumericImpl < JSONParser , Int8 > ;
template < typename JSONParser >
using JSONExtractUInt8Impl = JSONExtractNumericImpl < JSONParser , UInt8 > ;
template < typename JSONParser >
using JSONExtractInt16Impl = JSONExtractNumericImpl < JSONParser , Int16 > ;
template < typename JSONParser >
using JSONExtractUInt16Impl = JSONExtractNumericImpl < JSONParser , UInt16 > ;
template < typename JSONParser >
using JSONExtractInt32Impl = JSONExtractNumericImpl < JSONParser , Int32 > ;
template < typename JSONParser >
using JSONExtractUInt32Impl = JSONExtractNumericImpl < JSONParser , UInt32 > ;
template < typename JSONParser >
using JSONExtractInt64Impl = JSONExtractNumericImpl < JSONParser , Int64 > ;
template < typename JSONParser >
using JSONExtractUInt64Impl = JSONExtractNumericImpl < JSONParser , UInt64 > ;
template < typename JSONParser >
using JSONExtractFloat32Impl = JSONExtractNumericImpl < JSONParser , Float32 > ;
template < typename JSONParser >
using JSONExtractFloat64Impl = JSONExtractNumericImpl < JSONParser , Float64 > ;
template < typename JSONParser >
class JSONExtractBoolImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeUInt8 > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
if ( ! element . isBool ( ) )
return false ;
auto & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
col_vec . insertValue ( static_cast < UInt8 > ( element . getBool ( ) ) ) ;
return true ;
}
} ;
2021-10-20 03:52:48 +00:00
template < typename JSONParser >
class JSONExtractRawImpl ;
2021-10-14 20:10:53 +00:00
template < typename JSONParser >
class JSONExtractStringImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeString > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
2021-10-20 03:52:48 +00:00
if ( element . isNull ( ) )
2021-10-14 20:10:53 +00:00
return false ;
2021-10-20 03:52:48 +00:00
if ( ! element . isString ( ) )
return JSONExtractRawImpl < JSONParser > : : insertResultToColumn ( dest , element , { } ) ;
2021-10-14 20:10:53 +00:00
auto str = element . getString ( ) ;
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
col_str . insertData ( str . data ( ) , str . size ( ) ) ;
return true ;
}
} ;
/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables.
template < typename JSONParser >
struct JSONExtractTree
{
using Element = typename JSONParser : : Element ;
class Node
{
public :
Node ( ) = default ;
virtual ~ Node ( ) = default ;
virtual bool insertResultToColumn ( IColumn & , const Element & ) = 0 ;
} ;
template < typename NumberType >
class NumericNode : public Node
{
public :
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
return JSONExtractNumericImpl < JSONParser , NumberType , true > : : insertResultToColumn ( dest , element , { } ) ;
}
} ;
class LowCardinalityNode : public Node
{
public :
LowCardinalityNode ( DataTypePtr dictionary_type_ , std : : unique_ptr < Node > impl_ )
: dictionary_type ( dictionary_type_ ) , impl ( std : : move ( impl_ ) ) { }
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
auto from_col = dictionary_type - > createColumn ( ) ;
if ( impl - > insertResultToColumn ( * from_col , element ) )
{
StringRef value = from_col - > getDataAt ( 0 ) ;
assert_cast < ColumnLowCardinality & > ( dest ) . insertData ( value . data , value . size ) ;
return true ;
}
return false ;
}
private :
DataTypePtr dictionary_type ;
std : : unique_ptr < Node > impl ;
} ;
class UUIDNode : public Node
{
public :
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
if ( ! element . isString ( ) )
return false ;
auto uuid = parseFromString < UUID > ( element . getString ( ) ) ;
assert_cast < ColumnUUID & > ( dest ) . insert ( uuid ) ;
return true ;
}
} ;
template < typename DecimalType >
class DecimalNode : public Node
{
public :
2021-10-15 07:05:39 +00:00
explicit DecimalNode ( DataTypePtr data_type_ ) : data_type ( data_type_ ) { }
2021-10-14 20:10:53 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
if ( ! element . isDouble ( ) )
return false ;
const auto * type = assert_cast < const DataTypeDecimal < DecimalType > * > ( data_type . get ( ) ) ;
auto result = convertToDecimal < DataTypeNumber < Float64 > , DataTypeDecimal < DecimalType > > ( element . getDouble ( ) , type - > getScale ( ) ) ;
assert_cast < ColumnDecimal < DecimalType > & > ( dest ) . insert ( result ) ;
return true ;
}
private :
DataTypePtr data_type ;
} ;
class StringNode : public Node
{
public :
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
2021-10-20 03:52:48 +00:00
return JSONExtractStringImpl < JSONParser > : : insertResultToColumn ( dest , element , { } ) ;
2021-10-14 20:10:53 +00:00
}
} ;
class FixedStringNode : public Node
{
public :
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
if ( ! element . isString ( ) )
return false ;
auto & col_str = assert_cast < ColumnFixedString & > ( dest ) ;
auto str = element . getString ( ) ;
if ( str . size ( ) > col_str . getN ( ) )
return false ;
col_str . insertData ( str . data ( ) , str . size ( ) ) ;
return true ;
}
} ;
template < typename Type >
class EnumNode : public Node
{
public :
2021-10-15 07:05:39 +00:00
explicit EnumNode ( const std : : vector < std : : pair < String , Type > > & name_value_pairs_ ) : name_value_pairs ( name_value_pairs_ )
2021-10-14 20:10:53 +00:00
{
for ( const auto & name_value_pair : name_value_pairs )
{
name_to_value_map . emplace ( name_value_pair . first , name_value_pair . second ) ;
only_values . emplace ( name_value_pair . second ) ;
}
}
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
auto & col_vec = assert_cast < ColumnVector < Type > & > ( dest ) ;
if ( element . isInt64 ( ) )
{
Type value ;
2022-04-18 10:18:43 +00:00
if ( ! accurate : : convertNumeric ( element . getInt64 ( ) , value ) | | ! only_values . contains ( value ) )
2021-10-14 20:10:53 +00:00
return false ;
col_vec . insertValue ( value ) ;
return true ;
}
if ( element . isUInt64 ( ) )
{
Type value ;
2022-04-18 10:18:43 +00:00
if ( ! accurate : : convertNumeric ( element . getUInt64 ( ) , value ) | | ! only_values . contains ( value ) )
2021-10-14 20:10:53 +00:00
return false ;
col_vec . insertValue ( value ) ;
return true ;
}
if ( element . isString ( ) )
{
auto value = name_to_value_map . find ( element . getString ( ) ) ;
if ( value = = name_to_value_map . end ( ) )
return false ;
col_vec . insertValue ( value - > second ) ;
return true ;
}
return false ;
}
private :
std : : vector < std : : pair < String , Type > > name_value_pairs ;
std : : unordered_map < std : : string_view , Type > name_to_value_map ;
std : : unordered_set < Type > only_values ;
} ;
class NullableNode : public Node
{
public :
2021-10-15 07:05:39 +00:00
explicit NullableNode ( std : : unique_ptr < Node > nested_ ) : nested ( std : : move ( nested_ ) ) { }
2021-10-14 20:10:53 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
ColumnNullable & col_null = assert_cast < ColumnNullable & > ( dest ) ;
if ( ! nested - > insertResultToColumn ( col_null . getNestedColumn ( ) , element ) )
return false ;
col_null . getNullMapColumn ( ) . insertValue ( 0 ) ;
return true ;
}
private :
std : : unique_ptr < Node > nested ;
} ;
class ArrayNode : public Node
{
public :
2021-10-15 07:05:39 +00:00
explicit ArrayNode ( std : : unique_ptr < Node > nested_ ) : nested ( std : : move ( nested_ ) ) { }
2021-10-14 20:10:53 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
if ( ! element . isArray ( ) )
return false ;
auto array = element . getArray ( ) ;
ColumnArray & col_arr = assert_cast < ColumnArray & > ( dest ) ;
auto & data = col_arr . getData ( ) ;
size_t old_size = data . size ( ) ;
bool were_valid_elements = false ;
for ( auto value : array )
{
if ( nested - > insertResultToColumn ( data , value ) )
were_valid_elements = true ;
else
data . insertDefault ( ) ;
}
if ( ! were_valid_elements )
{
data . popBack ( data . size ( ) - old_size ) ;
return false ;
}
col_arr . getOffsets ( ) . push_back ( data . size ( ) ) ;
return true ;
}
private :
std : : unique_ptr < Node > nested ;
} ;
class TupleNode : public Node
{
public :
TupleNode ( std : : vector < std : : unique_ptr < Node > > nested_ , const std : : vector < String > & explicit_names_ ) : nested ( std : : move ( nested_ ) ) , explicit_names ( explicit_names_ )
{
for ( size_t i = 0 ; i ! = explicit_names . size ( ) ; + + i )
name_to_index_map . emplace ( explicit_names [ i ] , i ) ;
}
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
{
ColumnTuple & tuple = assert_cast < ColumnTuple & > ( dest ) ;
size_t old_size = dest . size ( ) ;
bool were_valid_elements = false ;
auto set_size = [ & ] ( size_t size )
{
for ( size_t i = 0 ; i ! = tuple . tupleSize ( ) ; + + i )
{
auto & col = tuple . getColumn ( i ) ;
if ( col . size ( ) ! = size )
{
if ( col . size ( ) > size )
col . popBack ( col . size ( ) - size ) ;
else
while ( col . size ( ) < size )
col . insertDefault ( ) ;
}
}
} ;
if ( element . isArray ( ) )
{
auto array = element . getArray ( ) ;
auto it = array . begin ( ) ;
for ( size_t index = 0 ; ( index ! = nested . size ( ) ) & & ( it ! = array . end ( ) ) ; + + index )
{
if ( nested [ index ] - > insertResultToColumn ( tuple . getColumn ( index ) , * it + + ) )
were_valid_elements = true ;
else
tuple . getColumn ( index ) . insertDefault ( ) ;
}
set_size ( old_size + static_cast < size_t > ( were_valid_elements ) ) ;
return were_valid_elements ;
}
if ( element . isObject ( ) )
{
auto object = element . getObject ( ) ;
if ( name_to_index_map . empty ( ) )
{
auto it = object . begin ( ) ;
for ( size_t index = 0 ; ( index ! = nested . size ( ) ) & & ( it ! = object . end ( ) ) ; + + index )
{
if ( nested [ index ] - > insertResultToColumn ( tuple . getColumn ( index ) , ( * it + + ) . second ) )
were_valid_elements = true ;
else
tuple . getColumn ( index ) . insertDefault ( ) ;
}
}
else
{
for ( const auto & [ key , value ] : object )
{
auto index = name_to_index_map . find ( key ) ;
if ( index ! = name_to_index_map . end ( ) )
{
if ( nested [ index - > second ] - > insertResultToColumn ( tuple . getColumn ( index - > second ) , value ) )
were_valid_elements = true ;
}
}
}
set_size ( old_size + static_cast < size_t > ( were_valid_elements ) ) ;
return were_valid_elements ;
}
return false ;
}
private :
std : : vector < std : : unique_ptr < Node > > nested ;
std : : vector < String > explicit_names ;
std : : unordered_map < std : : string_view , size_t > name_to_index_map ;
} ;
static std : : unique_ptr < Node > build ( const char * function_name , const DataTypePtr & type )
{
switch ( type - > getTypeId ( ) )
{
case TypeIndex : : UInt8 : return std : : make_unique < NumericNode < UInt8 > > ( ) ;
case TypeIndex : : UInt16 : return std : : make_unique < NumericNode < UInt16 > > ( ) ;
case TypeIndex : : UInt32 : return std : : make_unique < NumericNode < UInt32 > > ( ) ;
case TypeIndex : : UInt64 : return std : : make_unique < NumericNode < UInt64 > > ( ) ;
case TypeIndex : : Int8 : return std : : make_unique < NumericNode < Int8 > > ( ) ;
case TypeIndex : : Int16 : return std : : make_unique < NumericNode < Int16 > > ( ) ;
case TypeIndex : : Int32 : return std : : make_unique < NumericNode < Int32 > > ( ) ;
case TypeIndex : : Int64 : return std : : make_unique < NumericNode < Int64 > > ( ) ;
case TypeIndex : : Float32 : return std : : make_unique < NumericNode < Float32 > > ( ) ;
case TypeIndex : : Float64 : return std : : make_unique < NumericNode < Float64 > > ( ) ;
case TypeIndex : : String : return std : : make_unique < StringNode > ( ) ;
case TypeIndex : : FixedString : return std : : make_unique < FixedStringNode > ( ) ;
case TypeIndex : : UUID : return std : : make_unique < UUIDNode > ( ) ;
case TypeIndex : : LowCardinality :
{
auto dictionary_type = typeid_cast < const DataTypeLowCardinality * > ( type . get ( ) ) - > getDictionaryType ( ) ;
auto impl = build ( function_name , dictionary_type ) ;
return std : : make_unique < LowCardinalityNode > ( dictionary_type , std : : move ( impl ) ) ;
}
case TypeIndex : : Decimal256 : return std : : make_unique < DecimalNode < Decimal256 > > ( type ) ;
case TypeIndex : : Decimal128 : return std : : make_unique < DecimalNode < Decimal128 > > ( type ) ;
case TypeIndex : : Decimal64 : return std : : make_unique < DecimalNode < Decimal64 > > ( type ) ;
case TypeIndex : : Decimal32 : return std : : make_unique < DecimalNode < Decimal32 > > ( type ) ;
case TypeIndex : : Enum8 :
return std : : make_unique < EnumNode < Int8 > > ( static_cast < const DataTypeEnum8 & > ( * type ) . getValues ( ) ) ;
case TypeIndex : : Enum16 :
return std : : make_unique < EnumNode < Int16 > > ( static_cast < const DataTypeEnum16 & > ( * type ) . getValues ( ) ) ;
case TypeIndex : : Nullable :
{
return std : : make_unique < NullableNode > ( build ( function_name , static_cast < const DataTypeNullable & > ( * type ) . getNestedType ( ) ) ) ;
}
case TypeIndex : : Array :
{
return std : : make_unique < ArrayNode > ( build ( function_name , static_cast < const DataTypeArray & > ( * type ) . getNestedType ( ) ) ) ;
}
case TypeIndex : : Tuple :
{
const auto & tuple = static_cast < const DataTypeTuple & > ( * type ) ;
const auto & tuple_elements = tuple . getElements ( ) ;
std : : vector < std : : unique_ptr < Node > > elements ;
2021-10-15 07:05:39 +00:00
elements . reserve ( tuple_elements . size ( ) ) ;
2021-10-14 20:10:53 +00:00
for ( const auto & tuple_element : tuple_elements )
elements . emplace_back ( build ( function_name , tuple_element ) ) ;
return std : : make_unique < TupleNode > ( std : : move ( elements ) , tuple . haveExplicitNames ( ) ? tuple . getElementNames ( ) : Strings { } ) ;
}
default :
throw Exception { " Function " + String ( function_name ) + " doesn't support the return type schema: " + type - > getName ( ) , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
2020-07-11 21:04:22 +00:00
}
}
2021-10-14 20:10:53 +00:00
} ;
2020-07-11 21:04:22 +00:00
2021-10-14 20:10:53 +00:00
template < typename JSONParser >
class JSONExtractImpl
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
if ( arguments . size ( ) < 2 )
throw Exception { " Function " + String ( function_name ) + " requires at least two arguments " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & col = arguments . back ( ) ;
2021-10-15 07:05:39 +00:00
const auto * col_type_const = typeid_cast < const ColumnConst * > ( col . column . get ( ) ) ;
2021-10-14 20:10:53 +00:00
if ( ! col_type_const | | ! isString ( col . type ) )
throw Exception { " The last argument of function " + String ( function_name )
+ " should be a constant string specifying the return data type, illegal value: " + col . name ,
ErrorCodes : : ILLEGAL_COLUMN } ;
return DataTypeFactory : : instance ( ) . get ( col_type_const - > getValue < String > ( ) ) ;
2020-07-11 21:04:22 +00:00
}
2021-10-14 20:10:53 +00:00
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 2 ; }
void prepare ( const char * function_name , const ColumnsWithTypeAndName & , const DataTypePtr & result_type )
{
extract_tree = JSONExtractTree < JSONParser > : : build ( function_name , result_type ) ;
}
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
return extract_tree - > insertResultToColumn ( dest , element ) ;
}
protected :
std : : unique_ptr < typename JSONExtractTree < JSONParser > : : Node > extract_tree ;
} ;
template < typename JSONParser >
class JSONExtractKeysAndValuesImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
{
if ( arguments . size ( ) < 2 )
throw Exception { " Function " + String ( function_name ) + " requires at least two arguments " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & col = arguments . back ( ) ;
2021-10-15 07:05:39 +00:00
const auto * col_type_const = typeid_cast < const ColumnConst * > ( col . column . get ( ) ) ;
2021-10-14 20:10:53 +00:00
if ( ! col_type_const | | ! isString ( col . type ) )
throw Exception { " The last argument of function " + String ( function_name )
+ " should be a constant string specifying the values' data type, illegal value: " + col . name ,
ErrorCodes : : ILLEGAL_COLUMN } ;
DataTypePtr key_type = std : : make_unique < DataTypeString > ( ) ;
DataTypePtr value_type = DataTypeFactory : : instance ( ) . get ( col_type_const - > getValue < String > ( ) ) ;
DataTypePtr tuple_type = std : : make_unique < DataTypeTuple > ( DataTypes { key_type , value_type } ) ;
return std : : make_unique < DataTypeArray > ( tuple_type ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 2 ; }
void prepare ( const char * function_name , const ColumnsWithTypeAndName & , const DataTypePtr & result_type )
{
const auto tuple_type = typeid_cast < const DataTypeArray * > ( result_type . get ( ) ) - > getNestedType ( ) ;
const auto value_type = typeid_cast < const DataTypeTuple * > ( tuple_type . get ( ) ) - > getElements ( ) [ 1 ] ;
extract_tree = JSONExtractTree < JSONParser > : : build ( function_name , value_type ) ;
}
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
if ( ! element . isObject ( ) )
return false ;
auto object = element . getObject ( ) ;
auto & col_arr = assert_cast < ColumnArray & > ( dest ) ;
auto & col_tuple = assert_cast < ColumnTuple & > ( col_arr . getData ( ) ) ;
size_t old_size = col_tuple . size ( ) ;
auto & col_key = assert_cast < ColumnString & > ( col_tuple . getColumn ( 0 ) ) ;
auto & col_value = col_tuple . getColumn ( 1 ) ;
for ( const auto & [ key , value ] : object )
{
if ( extract_tree - > insertResultToColumn ( col_value , value ) )
col_key . insertData ( key . data ( ) , key . size ( ) ) ;
}
if ( col_tuple . size ( ) = = old_size )
return false ;
col_arr . getOffsets ( ) . push_back ( col_tuple . size ( ) ) ;
return true ;
}
private :
std : : unique_ptr < typename JSONExtractTree < JSONParser > : : Node > extract_tree ;
} ;
template < typename JSONParser >
class JSONExtractRawImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_shared < DataTypeString > ( ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
auto & chars = col_str . getChars ( ) ;
2021-12-13 04:57:54 +00:00
WriteBufferFromVector < ColumnString : : Chars > buf ( chars , AppendModeTag ( ) ) ;
2021-10-14 20:10:53 +00:00
traverse ( element , buf ) ;
buf . finalize ( ) ;
chars . push_back ( 0 ) ;
col_str . getOffsets ( ) . push_back ( chars . size ( ) ) ;
return true ;
}
private :
static void traverse ( const Element & element , WriteBuffer & buf )
{
if ( element . isInt64 ( ) )
{
writeIntText ( element . getInt64 ( ) , buf ) ;
return ;
}
if ( element . isUInt64 ( ) )
{
writeIntText ( element . getUInt64 ( ) , buf ) ;
return ;
}
if ( element . isDouble ( ) )
{
writeFloatText ( element . getDouble ( ) , buf ) ;
return ;
}
if ( element . isBool ( ) )
{
if ( element . getBool ( ) )
writeCString ( " true " , buf ) ;
2020-07-11 21:04:22 +00:00
else
2021-10-14 20:10:53 +00:00
writeCString ( " false " , buf ) ;
return ;
}
if ( element . isString ( ) )
{
2021-10-15 07:05:39 +00:00
writeJSONString ( element . getString ( ) , buf , formatSettings ( ) ) ;
2021-10-14 20:10:53 +00:00
return ;
}
if ( element . isArray ( ) )
{
writeChar ( ' [ ' , buf ) ;
bool need_comma = false ;
for ( auto value : element . getArray ( ) )
{
if ( std : : exchange ( need_comma , true ) )
writeChar ( ' , ' , buf ) ;
traverse ( value , buf ) ;
}
writeChar ( ' ] ' , buf ) ;
return ;
}
if ( element . isObject ( ) )
{
writeChar ( ' { ' , buf ) ;
bool need_comma = false ;
for ( auto [ key , value ] : element . getObject ( ) )
{
if ( std : : exchange ( need_comma , true ) )
writeChar ( ' , ' , buf ) ;
2021-10-15 07:05:39 +00:00
writeJSONString ( key , buf , formatSettings ( ) ) ;
2021-10-14 20:10:53 +00:00
writeChar ( ' : ' , buf ) ;
traverse ( value , buf ) ;
}
writeChar ( ' } ' , buf ) ;
return ;
}
if ( element . isNull ( ) )
{
writeCString ( " null " , buf ) ;
return ;
2020-07-11 21:04:22 +00:00
}
}
2021-10-15 07:05:39 +00:00
static const FormatSettings & formatSettings ( )
2021-10-14 20:10:53 +00:00
{
static const FormatSettings the_instance = [ ]
{
FormatSettings settings ;
settings . json . escape_forward_slashes = false ;
return settings ;
} ( ) ;
return the_instance ;
}
} ;
template < typename JSONParser >
class JSONExtractArrayRawImpl
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2020-07-11 21:04:22 +00:00
{
2021-10-14 20:10:53 +00:00
return std : : make_shared < DataTypeArray > ( std : : make_shared < DataTypeString > ( ) ) ;
2020-07-11 21:04:22 +00:00
}
2021-10-14 20:10:53 +00:00
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
if ( ! element . isArray ( ) )
return false ;
auto array = element . getArray ( ) ;
ColumnArray & col_res = assert_cast < ColumnArray & > ( dest ) ;
for ( auto value : array )
JSONExtractRawImpl < JSONParser > : : insertResultToColumn ( col_res . getData ( ) , value , { } ) ;
col_res . getOffsets ( ) . push_back ( col_res . getOffsets ( ) . back ( ) + array . size ( ) ) ;
return true ;
}
} ;
template < typename JSONParser >
class JSONExtractKeysAndValuesRawImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
DataTypePtr string_type = std : : make_unique < DataTypeString > ( ) ;
DataTypePtr tuple_type = std : : make_unique < DataTypeTuple > ( DataTypes { string_type , string_type } ) ;
return std : : make_unique < DataTypeArray > ( tuple_type ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
if ( ! element . isObject ( ) )
return false ;
auto object = element . getObject ( ) ;
auto & col_arr = assert_cast < ColumnArray & > ( dest ) ;
auto & col_tuple = assert_cast < ColumnTuple & > ( col_arr . getData ( ) ) ;
auto & col_key = assert_cast < ColumnString & > ( col_tuple . getColumn ( 0 ) ) ;
auto & col_value = assert_cast < ColumnString & > ( col_tuple . getColumn ( 1 ) ) ;
for ( const auto & [ key , value ] : object )
{
col_key . insertData ( key . data ( ) , key . size ( ) ) ;
JSONExtractRawImpl < JSONParser > : : insertResultToColumn ( col_value , value , { } ) ;
}
col_arr . getOffsets ( ) . push_back ( col_arr . getOffsets ( ) . back ( ) + object . size ( ) ) ;
return true ;
}
} ;
template < typename JSONParser >
class JSONExtractKeysImpl
{
public :
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
{
return std : : make_unique < DataTypeArray > ( std : : make_shared < DataTypeString > ( ) ) ;
}
static size_t getNumberOfIndexArguments ( const ColumnsWithTypeAndName & arguments ) { return arguments . size ( ) - 1 ; }
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
{
if ( ! element . isObject ( ) )
return false ;
auto object = element . getObject ( ) ;
ColumnArray & col_res = assert_cast < ColumnArray & > ( dest ) ;
auto & col_key = assert_cast < ColumnString & > ( col_res . getData ( ) ) ;
for ( const auto & [ key , value ] : object )
{
col_key . insertData ( key . data ( ) , key . size ( ) ) ;
}
col_res . getOffsets ( ) . push_back ( col_res . getOffsets ( ) . back ( ) + object . size ( ) ) ;
return true ;
}
} ;
2019-05-07 23:31:35 +00:00
2019-03-14 02:55:04 +00:00
void registerFunctionsJSON ( FunctionFactory & factory )
{
2021-09-10 10:27:58 +00:00
factory . registerFunction < JSONOverloadResolver < NameJSONHas , JSONHasImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameIsValidJSON , IsValidJSONImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONLength , JSONLengthImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONKey , JSONKeyImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONType , JSONTypeImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractInt , JSONExtractInt64Impl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractUInt , JSONExtractUInt64Impl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractFloat , JSONExtractFloat64Impl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractBool , JSONExtractBoolImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractString , JSONExtractStringImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtract , JSONExtractImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractKeysAndValues , JSONExtractKeysAndValuesImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractRaw , JSONExtractRawImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractArrayRaw , JSONExtractArrayRawImpl > > ( ) ;
factory . registerFunction < JSONOverloadResolver < NameJSONExtractKeysAndValuesRaw , JSONExtractKeysAndValuesRawImpl > > ( ) ;
2021-10-12 15:00:25 +00:00
factory . registerFunction < JSONOverloadResolver < NameJSONExtractKeys , JSONExtractKeysImpl > > ( ) ;
2019-03-14 02:55:04 +00:00
}
2019-05-07 23:31:35 +00:00
2019-03-14 02:55:04 +00:00
}