2019-03-14 02:55:04 +00:00
# pragma once
2019-12-09 13:12:54 +00:00
# include <Functions/IFunctionImpl.h>
2019-10-07 18:56:03 +00:00
# include <Core/AccurateComparison.h>
2019-05-16 19:39:42 +00:00
# include <Functions/DummyJSONParser.h>
# include <Functions/SimdJSONParser.h>
# include <Functions/RapidJSONParser.h>
# include <Common/CpuId.h>
2019-05-13 23:44:55 +00:00
# include <Common/typeid_cast.h>
2019-08-21 02:28:04 +00:00
# include <Common/assert_cast.h>
2019-05-16 19:39:42 +00:00
# include <Core/Settings.h>
2019-03-14 02:55:04 +00:00
# include <Columns/ColumnConst.h>
2019-03-14 08:07:25 +00:00
# include <Columns/ColumnString.h>
2019-05-13 23:44:55 +00:00
# include <Columns/ColumnVector.h>
# include <Columns/ColumnFixedString.h>
# include <Columns/ColumnNullable.h>
# include <Columns/ColumnArray.h>
# include <Columns/ColumnTuple.h>
# include <DataTypes/DataTypesNumber.h>
# include <DataTypes/DataTypeString.h>
# include <DataTypes/DataTypeEnum.h>
2019-03-14 05:48:29 +00:00
# include <DataTypes/DataTypeFactory.h>
2019-05-13 23:44:55 +00:00
# include <DataTypes/DataTypeNullable.h>
# include <DataTypes/DataTypeArray.h>
# include <DataTypes/DataTypeTuple.h>
2019-05-16 19:39:42 +00:00
# include <Interpreters/Context.h>
2019-04-26 21:58:14 +00:00
# include <ext/range.h>
2020-07-11 21:04:22 +00:00
# include <boost/tti/has_member_function.hpp>
2019-04-26 21:58:14 +00:00
2020-04-16 12:31:57 +00:00
# if !defined(ARCADIA_BUILD)
# include "config_functions.h"
# endif
2019-03-14 02:55:04 +00:00
namespace DB
{
namespace ErrorCodes
{
2019-03-14 06:19:21 +00:00
extern const int ILLEGAL_COLUMN ;
2019-03-14 02:55:04 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT ;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH ;
}
2019-05-13 23:44:55 +00:00
/// Functions to parse JSONs and extract values from it.
/// The first argument of all these functions gets a JSON,
/// after that there are any number of arguments specifying path to a desired part from the JSON's root.
/// For example,
2019-05-15 22:00:05 +00:00
/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100
2020-07-11 21:04:22 +00:00
class FunctionJSONHelpers
2019-03-14 02:55:04 +00:00
{
2019-05-13 23:44:55 +00:00
public :
2020-07-11 21:04:22 +00:00
template < typename Name , template < typename > typename Impl , class JSONParser >
2019-05-16 19:39:42 +00:00
class Executor
{
public :
static void run ( Block & block , const ColumnNumbers & arguments , size_t result_pos , size_t input_rows_count )
{
MutableColumnPtr to { block . getByPosition ( result_pos ) . type - > createColumn ( ) } ;
to - > reserve ( input_rows_count ) ;
if ( arguments . size ( ) < 1 )
throw Exception { " Function " + String ( Name : : name ) + " requires at least one argument " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & first_column = block . getByPosition ( arguments [ 0 ] ) ;
if ( ! isString ( first_column . type ) )
throw Exception { " The first argument of function " + String ( Name : : name ) + " should be a string containing JSON, illegal type: " + first_column . type - > getName ( ) ,
2019-05-15 22:00:05 +00:00
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
2019-05-16 19:39:42 +00:00
const ColumnPtr & arg_json = first_column . column ;
auto col_json_const = typeid_cast < const ColumnConst * > ( arg_json . get ( ) ) ;
auto col_json_string
= typeid_cast < const ColumnString * > ( col_json_const ? col_json_const - > getDataColumnPtr ( ) . get ( ) : arg_json . get ( ) ) ;
if ( ! col_json_string )
throw Exception { " Illegal column " + arg_json - > getName ( ) , ErrorCodes : : ILLEGAL_COLUMN } ;
const ColumnString : : Chars & chars = col_json_string - > getChars ( ) ;
const ColumnString : : Offsets & offsets = col_json_string - > getOffsets ( ) ;
2020-07-11 21:04:22 +00:00
size_t num_index_arguments = Impl < JSONParser > : : getNumberOfIndexArguments ( block , arguments ) ;
std : : vector < Move > moves = prepareMoves ( Name : : name , block , arguments , 1 , num_index_arguments ) ;
2019-05-15 22:00:05 +00:00
2019-05-16 19:39:42 +00:00
/// Preallocate memory in parser if necessary.
JSONParser parser ;
2020-07-11 21:04:22 +00:00
if constexpr ( has_member_function_reserve < void ( JSONParser : : * ) ( size_t ) > : : value )
{
size_t max_size = calculateMaxSize ( offsets ) ;
if ( max_size )
parser . reserve ( max_size ) ;
}
2019-05-15 22:00:05 +00:00
2019-05-16 19:39:42 +00:00
Impl < JSONParser > impl ;
2019-03-14 02:55:04 +00:00
2019-05-16 19:39:42 +00:00
/// prepare() does Impl-specific preparation before handling each row.
2020-07-11 21:04:22 +00:00
if constexpr ( has_member_function_prepare < void ( Impl < JSONParser > : : * ) ( const char * , const Block & , const ColumnNumbers & , size_t ) > : : value )
impl . prepare ( Name : : name , block , arguments , result_pos ) ;
2019-05-13 23:44:55 +00:00
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
Element document ;
bool document_ok = false ;
2019-05-27 18:46:55 +00:00
if ( col_json_const )
{
2020-07-11 21:04:22 +00:00
std : : string_view json { reinterpret_cast < const char * > ( & chars [ 0 ] ) , offsets [ 0 ] - 1 } ;
document_ok = parser . parse ( json , document ) ;
2019-05-27 18:46:55 +00:00
}
2019-05-16 19:39:42 +00:00
for ( const auto i : ext : : range ( 0 , input_rows_count ) )
2019-05-13 23:44:55 +00:00
{
2019-05-27 18:46:55 +00:00
if ( ! col_json_const )
{
2020-07-11 21:04:22 +00:00
std : : string_view json { reinterpret_cast < const char * > ( & chars [ offsets [ i - 1 ] ] ) , offsets [ i ] - offsets [ i - 1 ] - 1 } ;
document_ok = parser . parse ( json , document ) ;
2019-05-27 18:46:55 +00:00
}
2019-05-13 23:44:55 +00:00
2020-07-11 21:04:22 +00:00
bool added_to_column = false ;
if ( document_ok )
2019-03-14 02:55:04 +00:00
{
2019-05-16 19:39:42 +00:00
/// Perform moves.
2020-07-11 21:04:22 +00:00
Element element ;
std : : string_view last_key ;
bool moves_ok = performMoves < JSONParser > ( block , arguments , i , document , moves , element , last_key ) ;
2019-05-16 19:39:42 +00:00
2020-07-11 21:04:22 +00:00
if ( moves_ok )
added_to_column = impl . insertResultToColumn ( * to , element , last_key ) ;
2019-05-10 08:49:03 +00:00
}
2019-05-16 19:39:42 +00:00
/// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions.
2020-07-11 21:04:22 +00:00
if ( ! added_to_column )
2019-05-16 19:39:42 +00:00
to - > insertDefault ( ) ;
2019-05-16 12:16:21 +00:00
}
2019-05-16 19:39:42 +00:00
block . getByPosition ( result_pos ) . column = std : : move ( to ) ;
2019-05-13 23:44:55 +00:00
}
2020-07-11 21:04:22 +00:00
} ;
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
private :
BOOST_TTI_HAS_MEMBER_FUNCTION ( reserve )
BOOST_TTI_HAS_MEMBER_FUNCTION ( prepare )
2020-07-20 17:01:58 +00:00
template < class T , class = void >
struct has_index_operator : std : : false_type { } ;
template < class T >
struct has_index_operator < T , std : : void_t < decltype ( std : : declval < T > ( ) [ 0 ] ) > > : std : : true_type { } ;
2020-07-11 21:04:22 +00:00
/// Represents a move of a JSON iterator described by a single argument passed to a JSON function.
/// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1)
/// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}.
/// Keys and indices can be nonconst, in this case they are calculated for each row.
enum class MoveType
{
Key ,
Index ,
ConstKey ,
ConstIndex ,
} ;
2019-05-17 14:21:37 +00:00
2020-07-11 21:04:22 +00:00
struct Move
{
Move ( MoveType type_ , size_t index_ = 0 ) : type ( type_ ) , index ( index_ ) { }
Move ( MoveType type_ , const String & key_ ) : type ( type_ ) , key ( key_ ) { }
MoveType type ;
size_t index = 0 ;
String key ;
} ;
2019-05-13 23:44:55 +00:00
2020-07-11 21:04:22 +00:00
static std : : vector < Move > prepareMoves ( const char * function_name , Block & block , const ColumnNumbers & arguments , size_t first_index_argument , size_t num_index_arguments ) ;
/// Performs moves of types MoveType::Index and MoveType::ConstIndex.
template < typename JSONParser >
static bool performMoves ( const Block & block , const ColumnNumbers & arguments , size_t row ,
const typename JSONParser : : Element & document , const std : : vector < Move > & moves ,
typename JSONParser : : Element & element , std : : string_view & last_key )
{
typename JSONParser : : Element res_element = document ;
std : : string_view key ;
for ( size_t j = 0 ; j ! = moves . size ( ) ; + + j )
2019-05-17 14:21:37 +00:00
{
2020-07-11 21:04:22 +00:00
switch ( moves [ j ] . type )
2019-05-17 14:21:37 +00:00
{
2020-07-11 21:04:22 +00:00
case MoveType : : ConstIndex :
2019-05-17 14:21:37 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! moveToElementByIndex < JSONParser > ( res_element , moves [ j ] . index , key ) )
return false ;
break ;
2019-05-17 14:21:37 +00:00
}
2020-07-11 21:04:22 +00:00
case MoveType : : ConstKey :
2019-05-17 14:21:37 +00:00
{
2020-07-11 21:04:22 +00:00
key = moves [ j ] . key ;
if ( ! moveToElementByKey < JSONParser > ( res_element , key ) )
return false ;
break ;
}
case MoveType : : Index :
{
Int64 index = ( * block . getByPosition ( arguments [ j + 1 ] ) . column ) [ row ] . get < Int64 > ( ) ;
if ( ! moveToElementByIndex < JSONParser > ( res_element , index , key ) )
return false ;
break ;
}
case MoveType : : Key :
{
key = std : : string_view { ( * block . getByPosition ( arguments [ j + 1 ] ) . column ) . getDataAt ( row ) } ;
if ( ! moveToElementByKey < JSONParser > ( res_element , key ) )
return false ;
break ;
2019-05-17 14:21:37 +00:00
}
}
}
2020-07-11 21:04:22 +00:00
element = res_element ;
last_key = key ;
return true ;
}
2019-05-15 22:00:05 +00:00
2020-07-11 21:04:22 +00:00
template < typename JSONParser >
static bool moveToElementByIndex ( typename JSONParser : : Element & element , int index , std : : string_view & out_key )
{
if ( element . isArray ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
auto array = element . getArray ( ) ;
if ( index > = 0 )
- - index ;
2020-07-20 17:01:58 +00:00
else
index + = array . size ( ) ;
if ( static_cast < size_t > ( index ) > = array . size ( ) )
2020-07-11 21:04:22 +00:00
return false ;
element = array [ index ] ;
out_key = { } ;
return true ;
}
2020-07-20 17:01:58 +00:00
if constexpr ( has_index_operator < typename JSONParser : : Object > : : value )
2020-07-11 21:04:22 +00:00
{
2020-07-20 17:01:58 +00:00
if ( element . isObject ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-20 17:01:58 +00:00
auto object = element . getObject ( ) ;
if ( index > = 0 )
- - index ;
else
index + = object . size ( ) ;
2020-07-11 21:04:22 +00:00
if ( static_cast < size_t > ( index ) > = object . size ( ) )
return false ;
std : : tie ( out_key , element ) = object [ index ] ;
return true ;
2019-05-13 23:44:55 +00:00
}
}
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
return { } ;
}
/// Performs moves of types MoveType::Key and MoveType::ConstKey.
template < typename JSONParser >
static bool moveToElementByKey ( typename JSONParser : : Element & element , const std : : string_view & key )
{
if ( ! element . isObject ( ) )
2019-05-16 19:39:42 +00:00
return false ;
2020-07-11 21:04:22 +00:00
auto object = element . getObject ( ) ;
return object . find ( key , element ) ;
}
static size_t calculateMaxSize ( const ColumnString : : Offsets & offsets ) ;
} ;
template < typename Name , template < typename > typename Impl >
class FunctionJSON : public IFunction
{
public :
static FunctionPtr create ( const Context & context_ ) { return std : : make_shared < FunctionJSON > ( context_ ) ; }
FunctionJSON ( const Context & context_ ) : context ( context_ ) { }
2019-05-17 14:21:37 +00:00
2020-07-11 21:04:22 +00:00
static constexpr auto name = Name : : name ;
String getName ( ) const override { return Name : : name ; }
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
bool useDefaultImplementationForConstants ( ) const override { return false ; }
DataTypePtr getReturnTypeImpl ( const ColumnsWithTypeAndName & arguments ) const override
{
return Impl < DummyJSONParser > : : getReturnType ( Name : : name , arguments ) ;
}
2020-07-21 13:58:07 +00:00
void executeImpl ( Block & block , const ColumnNumbers & arguments , size_t result_pos , size_t input_rows_count ) const override
2020-07-11 21:04:22 +00:00
{
/// Choose JSONParser.
# if USE_SIMDJSON
if ( context . getSettingsRef ( ) . allow_simdjson )
2019-05-17 14:21:37 +00:00
{
2020-07-11 21:04:22 +00:00
FunctionJSONHelpers : : Executor < Name , Impl , SimdJSONParser > : : run ( block , arguments , result_pos , input_rows_count ) ;
return ;
2019-05-17 14:21:37 +00:00
}
2020-07-11 21:04:22 +00:00
# endif
# if USE_RAPIDJSON
FunctionJSONHelpers : : Executor < Name , Impl , RapidJSONParser > : : run ( block , arguments , result_pos , input_rows_count ) ;
# else
FunctionJSONHelpers : : Executor < Name , Impl , DummyJSONParser > : : run ( block , arguments , result_pos , input_rows_count ) ;
# endif
}
private :
const Context & context ;
2019-05-13 23:44:55 +00:00
} ;
2019-03-14 02:55:04 +00:00
2019-05-13 23:44:55 +00:00
struct NameJSONHas { static constexpr auto name { " JSONHas " } ; } ;
2019-10-12 11:42:43 +00:00
struct NameIsValidJSON { static constexpr auto name { " isValidJSON " } ; } ;
2019-05-13 23:44:55 +00:00
struct NameJSONLength { static constexpr auto name { " JSONLength " } ; } ;
struct NameJSONKey { static constexpr auto name { " JSONKey " } ; } ;
struct NameJSONType { static constexpr auto name { " JSONType " } ; } ;
struct NameJSONExtractInt { static constexpr auto name { " JSONExtractInt " } ; } ;
struct NameJSONExtractUInt { static constexpr auto name { " JSONExtractUInt " } ; } ;
struct NameJSONExtractFloat { static constexpr auto name { " JSONExtractFloat " } ; } ;
struct NameJSONExtractBool { static constexpr auto name { " JSONExtractBool " } ; } ;
struct NameJSONExtractString { static constexpr auto name { " JSONExtractString " } ; } ;
struct NameJSONExtract { static constexpr auto name { " JSONExtract " } ; } ;
2019-05-15 23:56:10 +00:00
struct NameJSONExtractKeysAndValues { static constexpr auto name { " JSONExtractKeysAndValues " } ; } ;
struct NameJSONExtractRaw { static constexpr auto name { " JSONExtractRaw " } ; } ;
2019-12-08 00:18:24 +00:00
struct NameJSONExtractArrayRaw { static constexpr auto name { " JSONExtractArrayRaw " } ; } ;
2020-04-20 10:08:22 +00:00
struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name { " JSONExtractKeysAndValuesRaw " } ; } ;
2019-05-13 23:44:55 +00:00
2019-10-19 07:26:19 +00:00
template < typename JSONParser >
class JSONHasImpl
2019-05-13 23:44:55 +00:00
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & ) { return std : : make_shared < DataTypeUInt8 > ( ) ; }
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
2019-05-13 23:44:55 +00:00
2020-07-11 21:04:22 +00:00
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
2019-08-21 02:28:04 +00:00
ColumnVector < UInt8 > & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
2019-05-13 23:44:55 +00:00
col_vec . insertValue ( 1 ) ;
2019-03-14 02:55:04 +00:00
return true ;
}
2019-05-13 23:44:55 +00:00
} ;
2019-10-19 07:26:19 +00:00
2019-10-12 11:42:43 +00:00
template < typename JSONParser >
2019-10-19 07:26:19 +00:00
class IsValidJSONImpl
{
2019-10-21 13:39:55 +00:00
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
2019-10-19 07:26:19 +00:00
{
if ( arguments . size ( ) ! = 1 )
2019-10-21 13:39:55 +00:00
{
/// IsValidJSON() shouldn't get parameters other than JSON.
2019-10-19 07:26:19 +00:00
throw Exception { " Function " + String ( function_name ) + " needs exactly one argument " ,
ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
2019-10-21 13:39:55 +00:00
}
2019-10-19 07:26:19 +00:00
return std : : make_shared < DataTypeUInt8 > ( ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & ) { return 0 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & )
2019-10-19 07:26:19 +00:00
{
2019-10-21 13:39:55 +00:00
/// This function is called only if JSON is valid.
/// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function.
ColumnVector < UInt8 > & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
col_vec . insertValue ( 1 ) ;
return true ;
2019-10-19 07:26:19 +00:00
}
} ;
2019-05-13 23:44:55 +00:00
template < typename JSONParser >
class JSONLengthImpl
{
2019-03-14 02:55:04 +00:00
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
return std : : make_shared < DataTypeUInt64 > ( ) ;
}
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
size_t size ;
2020-07-11 21:04:22 +00:00
if ( element . isArray ( ) )
size = element . getArray ( ) . size ( ) ;
else if ( element . isObject ( ) )
size = element . getObject ( ) . size ( ) ;
2019-05-13 23:44:55 +00:00
else
return false ;
2019-03-14 02:55:04 +00:00
2019-08-21 02:28:04 +00:00
ColumnVector < UInt64 > & col_vec = assert_cast < ColumnVector < UInt64 > & > ( dest ) ;
2019-05-13 23:44:55 +00:00
col_vec . insertValue ( size ) ;
return true ;
}
} ;
2019-03-14 02:55:04 +00:00
2019-05-13 23:44:55 +00:00
template < typename JSONParser >
class JSONKeyImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
return std : : make_shared < DataTypeString > ( ) ;
}
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & , const std : : string_view & last_key )
2019-03-14 02:55:04 +00:00
{
2020-07-11 21:04:22 +00:00
if ( last_key . empty ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2019-08-21 02:28:04 +00:00
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
2020-07-11 21:04:22 +00:00
col_str . insertData ( last_key . data ( ) , last_key . size ( ) ) ;
2019-05-13 23:44:55 +00:00
return true ;
}
} ;
2019-03-14 05:48:29 +00:00
2019-03-14 08:30:15 +00:00
2019-05-13 23:44:55 +00:00
template < typename JSONParser >
class JSONTypeImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
static const std : : vector < std : : pair < String , Int8 > > values = {
{ " Array " , ' [ ' } ,
{ " Object " , ' { ' } ,
{ " String " , ' " ' } ,
2019-05-16 18:22:59 +00:00
{ " Int64 " , ' i ' } ,
{ " UInt64 " , ' u ' } ,
{ " Double " , ' d ' } ,
2019-05-13 23:44:55 +00:00
{ " Bool " , ' b ' } ,
2019-05-15 22:00:05 +00:00
{ " Null " , 0 } , /// the default value for the column.
2019-05-13 23:44:55 +00:00
} ;
return std : : make_shared < DataTypeEnum < Int8 > > ( values ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
UInt8 type ;
2020-07-11 21:04:22 +00:00
if ( element . isInt64 ( ) )
2019-05-16 18:22:59 +00:00
type = ' i ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isUInt64 ( ) )
2019-05-16 18:22:59 +00:00
type = ' u ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isDouble ( ) )
2019-05-13 23:44:55 +00:00
type = ' d ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isBool ( ) )
2019-05-13 23:44:55 +00:00
type = ' b ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isString ( ) )
2019-05-13 23:44:55 +00:00
type = ' " ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isArray ( ) )
2019-05-13 23:44:55 +00:00
type = ' [ ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isObject ( ) )
2019-05-13 23:44:55 +00:00
type = ' { ' ;
2020-07-11 21:04:22 +00:00
else if ( element . isNull ( ) )
2019-05-13 23:44:55 +00:00
type = 0 ;
2019-03-14 02:55:04 +00:00
else
2019-05-13 23:44:55 +00:00
return false ;
2019-08-21 02:28:04 +00:00
ColumnVector < Int8 > & col_vec = assert_cast < ColumnVector < Int8 > & > ( dest ) ;
2019-05-13 23:44:55 +00:00
col_vec . insertValue ( type ) ;
return true ;
}
} ;
template < typename JSONParser , typename NumberType , bool convert_bool_to_integer = false >
class JSONExtractNumericImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
return std : : make_shared < DataTypeNumber < NumberType > > ( ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
NumberType value ;
2020-07-11 21:04:22 +00:00
if ( element . isInt64 ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! accurate : : convertNumeric ( element . getInt64 ( ) , value ) )
2019-05-13 23:44:55 +00:00
return false ;
}
2020-07-11 21:04:22 +00:00
else if ( element . isUInt64 ( ) )
2019-03-14 02:55:04 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! accurate : : convertNumeric ( element . getUInt64 ( ) , value ) )
2019-05-16 18:22:59 +00:00
return false ;
}
2020-07-11 21:04:22 +00:00
else if ( element . isDouble ( ) )
2019-05-16 18:22:59 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! accurate : : convertNumeric ( element . getDouble ( ) , value ) )
2019-05-13 23:44:55 +00:00
return false ;
2019-03-14 02:55:04 +00:00
}
2020-08-19 11:52:17 +00:00
else if ( element . isBool ( ) & & is_integer_v < NumberType > & & convert_bool_to_integer )
2020-07-11 21:04:22 +00:00
value = static_cast < NumberType > ( element . getBool ( ) ) ;
2019-05-13 23:44:55 +00:00
else
return false ;
2019-03-14 02:55:04 +00:00
2019-08-21 02:28:04 +00:00
auto & col_vec = assert_cast < ColumnVector < NumberType > & > ( dest ) ;
2019-05-13 23:44:55 +00:00
col_vec . insertValue ( value ) ;
return true ;
}
} ;
template < typename JSONParser >
using JSONExtractInt8Impl = JSONExtractNumericImpl < JSONParser , Int8 > ;
template < typename JSONParser >
using JSONExtractUInt8Impl = JSONExtractNumericImpl < JSONParser , UInt8 > ;
template < typename JSONParser >
using JSONExtractInt16Impl = JSONExtractNumericImpl < JSONParser , Int16 > ;
template < typename JSONParser >
using JSONExtractUInt16Impl = JSONExtractNumericImpl < JSONParser , UInt16 > ;
template < typename JSONParser >
using JSONExtractInt32Impl = JSONExtractNumericImpl < JSONParser , Int32 > ;
template < typename JSONParser >
using JSONExtractUInt32Impl = JSONExtractNumericImpl < JSONParser , UInt32 > ;
template < typename JSONParser >
using JSONExtractInt64Impl = JSONExtractNumericImpl < JSONParser , Int64 > ;
template < typename JSONParser >
using JSONExtractUInt64Impl = JSONExtractNumericImpl < JSONParser , UInt64 > ;
template < typename JSONParser >
using JSONExtractFloat32Impl = JSONExtractNumericImpl < JSONParser , Float32 > ;
template < typename JSONParser >
using JSONExtractFloat64Impl = JSONExtractNumericImpl < JSONParser , Float64 > ;
template < typename JSONParser >
class JSONExtractBoolImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
return std : : make_shared < DataTypeUInt8 > ( ) ;
}
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isBool ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2019-03-14 02:55:04 +00:00
2019-08-21 02:28:04 +00:00
auto & col_vec = assert_cast < ColumnVector < UInt8 > & > ( dest ) ;
2020-07-11 21:04:22 +00:00
col_vec . insertValue ( static_cast < UInt8 > ( element . getBool ( ) ) ) ;
2019-05-13 23:44:55 +00:00
return true ;
}
} ;
template < typename JSONParser >
class JSONExtractStringImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-13 23:44:55 +00:00
{
return std : : make_shared < DataTypeString > ( ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isString ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2020-07-11 21:04:22 +00:00
auto str = element . getString ( ) ;
2019-08-21 02:28:04 +00:00
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
2020-07-11 21:04:22 +00:00
col_str . insertData ( str . data ( ) , str . size ( ) ) ;
2019-05-13 23:44:55 +00:00
return true ;
}
} ;
2019-05-15 23:56:10 +00:00
/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables.
2019-05-13 23:44:55 +00:00
template < typename JSONParser >
2019-05-15 23:56:10 +00:00
struct JSONExtractTree
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
2019-03-14 08:07:25 +00:00
2019-05-13 23:44:55 +00:00
class Node
{
public :
Node ( ) { }
virtual ~ Node ( ) { }
2020-07-11 21:04:22 +00:00
virtual bool insertResultToColumn ( IColumn & , const Element & ) = 0 ;
2019-05-13 23:44:55 +00:00
} ;
2019-03-14 08:07:25 +00:00
2019-05-13 23:44:55 +00:00
template < typename NumberType >
class NumericNode : public Node
{
public :
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
return JSONExtractNumericImpl < JSONParser , NumberType , true > : : insertResultToColumn ( dest , element , { } ) ;
2019-05-13 23:44:55 +00:00
}
} ;
2019-03-14 08:07:25 +00:00
2019-05-13 23:44:55 +00:00
class StringNode : public Node
{
public :
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
return JSONExtractStringImpl < JSONParser > : : insertResultToColumn ( dest , element , { } ) ;
2019-05-13 23:44:55 +00:00
}
} ;
2019-03-14 08:07:25 +00:00
2019-05-13 23:44:55 +00:00
class FixedStringNode : public Node
{
public :
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isString ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2019-08-21 02:28:04 +00:00
auto & col_str = assert_cast < ColumnFixedString & > ( dest ) ;
2020-07-11 21:04:22 +00:00
auto str = element . getString ( ) ;
if ( str . size ( ) > col_str . getN ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2020-07-11 21:04:22 +00:00
col_str . insertData ( str . data ( ) , str . size ( ) ) ;
2019-05-13 23:44:55 +00:00
return true ;
}
} ;
2019-03-14 08:07:25 +00:00
2019-05-13 23:44:55 +00:00
template < typename Type >
class EnumNode : public Node
{
public :
EnumNode ( const std : : vector < std : : pair < String , Type > > & name_value_pairs_ ) : name_value_pairs ( name_value_pairs_ )
2019-03-14 08:07:25 +00:00
{
2019-05-13 23:44:55 +00:00
for ( const auto & name_value_pair : name_value_pairs )
{
name_to_value_map . emplace ( name_value_pair . first , name_value_pair . second ) ;
only_values . emplace ( name_value_pair . second ) ;
}
}
2019-03-14 08:07:25 +00:00
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2019-08-21 02:28:04 +00:00
auto & col_vec = assert_cast < ColumnVector < Type > & > ( dest ) ;
2019-03-14 02:55:04 +00:00
2020-07-11 21:04:22 +00:00
if ( element . isInt64 ( ) )
2019-05-16 18:22:59 +00:00
{
Type value ;
2020-07-11 21:04:22 +00:00
if ( ! accurate : : convertNumeric ( element . getInt64 ( ) , value ) | | ! only_values . count ( value ) )
2019-05-16 18:22:59 +00:00
return false ;
col_vec . insertValue ( value ) ;
return true ;
}
2020-07-11 21:04:22 +00:00
if ( element . isUInt64 ( ) )
2019-03-14 02:55:04 +00:00
{
2019-05-16 18:22:59 +00:00
Type value ;
2020-07-11 21:04:22 +00:00
if ( ! accurate : : convertNumeric ( element . getUInt64 ( ) , value ) | | ! only_values . count ( value ) )
2019-05-13 23:44:55 +00:00
return false ;
col_vec . insertValue ( value ) ;
return true ;
}
2019-03-14 08:07:25 +00:00
2020-07-11 21:04:22 +00:00
if ( element . isString ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
auto value = name_to_value_map . find ( element . getString ( ) ) ;
2019-05-13 23:44:55 +00:00
if ( value = = name_to_value_map . end ( ) )
return false ;
col_vec . insertValue ( value - > second ) ;
return true ;
2019-03-14 02:55:04 +00:00
}
2019-05-13 23:44:55 +00:00
return false ;
}
private :
std : : vector < std : : pair < String , Type > > name_value_pairs ;
2020-07-11 21:04:22 +00:00
std : : unordered_map < std : : string_view , Type > name_to_value_map ;
2019-05-13 23:44:55 +00:00
std : : unordered_set < Type > only_values ;
} ;
class NullableNode : public Node
{
public :
NullableNode ( std : : unique_ptr < Node > nested_ ) : nested ( std : : move ( nested_ ) ) { }
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2019-08-21 02:28:04 +00:00
ColumnNullable & col_null = assert_cast < ColumnNullable & > ( dest ) ;
2020-07-11 21:04:22 +00:00
if ( ! nested - > insertResultToColumn ( col_null . getNestedColumn ( ) , element ) )
2019-05-13 23:44:55 +00:00
return false ;
col_null . getNullMapColumn ( ) . insertValue ( 0 ) ;
return true ;
}
private :
std : : unique_ptr < Node > nested ;
} ;
class ArrayNode : public Node
{
public :
ArrayNode ( std : : unique_ptr < Node > nested_ ) : nested ( std : : move ( nested_ ) ) { }
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isArray ( ) )
2019-05-13 23:44:55 +00:00
return false ;
2020-07-11 21:04:22 +00:00
auto array = element . getArray ( ) ;
2019-05-13 23:44:55 +00:00
2019-08-21 02:28:04 +00:00
ColumnArray & col_arr = assert_cast < ColumnArray & > ( dest ) ;
2019-05-13 23:44:55 +00:00
auto & data = col_arr . getData ( ) ;
size_t old_size = data . size ( ) ;
bool were_valid_elements = false ;
2020-07-11 21:04:22 +00:00
for ( auto value : array )
2019-03-14 02:55:04 +00:00
{
2020-07-11 21:04:22 +00:00
if ( nested - > insertResultToColumn ( data , value ) )
2019-05-13 23:44:55 +00:00
were_valid_elements = true ;
2019-03-14 02:55:04 +00:00
else
2019-05-13 23:44:55 +00:00
data . insertDefault ( ) ;
2019-03-14 02:55:04 +00:00
}
2019-05-13 23:44:55 +00:00
if ( ! were_valid_elements )
2019-03-14 02:55:04 +00:00
{
2019-05-13 23:44:55 +00:00
data . popBack ( data . size ( ) - old_size ) ;
return false ;
2019-03-14 02:55:04 +00:00
}
2019-05-13 23:44:55 +00:00
col_arr . getOffsets ( ) . push_back ( data . size ( ) ) ;
return true ;
2019-03-14 02:55:04 +00:00
}
2019-05-13 23:44:55 +00:00
private :
std : : unique_ptr < Node > nested ;
} ;
2019-05-07 23:31:35 +00:00
2019-05-13 23:44:55 +00:00
class TupleNode : public Node
{
public :
TupleNode ( std : : vector < std : : unique_ptr < Node > > nested_ , const std : : vector < String > & explicit_names_ ) : nested ( std : : move ( nested_ ) ) , explicit_names ( explicit_names_ )
{
for ( size_t i = 0 ; i ! = explicit_names . size ( ) ; + + i )
name_to_index_map . emplace ( explicit_names [ i ] , i ) ;
}
2019-05-07 23:31:35 +00:00
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element ) override
2019-05-13 23:44:55 +00:00
{
2019-08-21 02:28:04 +00:00
ColumnTuple & tuple = assert_cast < ColumnTuple & > ( dest ) ;
2019-05-13 23:44:55 +00:00
size_t old_size = dest . size ( ) ;
bool were_valid_elements = false ;
2019-05-07 23:31:35 +00:00
2019-05-13 23:44:55 +00:00
auto set_size = [ & ] ( size_t size )
{
for ( size_t i = 0 ; i ! = tuple . tupleSize ( ) ; + + i )
{
auto & col = tuple . getColumn ( i ) ;
if ( col . size ( ) ! = size )
{
if ( col . size ( ) > size )
col . popBack ( col . size ( ) - size ) ;
else
while ( col . size ( ) < size )
col . insertDefault ( ) ;
}
}
} ;
2019-05-07 23:31:35 +00:00
2020-07-11 21:04:22 +00:00
if ( element . isArray ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
auto array = element . getArray ( ) ;
auto it = array . begin ( ) ;
2019-05-07 23:31:35 +00:00
2020-07-11 21:04:22 +00:00
for ( size_t index = 0 ; ( index ! = nested . size ( ) ) & & ( it ! = array . end ( ) ) ; + + index )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( nested [ index ] - > insertResultToColumn ( tuple . getColumn ( index ) , * it + + ) )
2019-05-13 23:44:55 +00:00
were_valid_elements = true ;
else
tuple . getColumn ( index ) . insertDefault ( ) ;
}
set_size ( old_size + static_cast < size_t > ( were_valid_elements ) ) ;
return were_valid_elements ;
}
2020-07-11 21:04:22 +00:00
if ( element . isObject ( ) )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
auto object = element . getObject ( ) ;
2019-05-13 23:44:55 +00:00
if ( name_to_index_map . empty ( ) )
{
2020-07-11 21:04:22 +00:00
auto it = object . begin ( ) ;
for ( size_t index = 0 ; ( index ! = nested . size ( ) ) & & ( it ! = object . end ( ) ) ; + + index )
2019-05-13 23:44:55 +00:00
{
2020-07-11 21:04:22 +00:00
if ( nested [ index ] - > insertResultToColumn ( tuple . getColumn ( index ) , ( * it + + ) . second ) )
2019-05-13 23:44:55 +00:00
were_valid_elements = true ;
else
tuple . getColumn ( index ) . insertDefault ( ) ;
}
}
else
{
2020-07-11 21:04:22 +00:00
for ( auto [ key , value ] : object )
2019-05-13 23:44:55 +00:00
{
auto index = name_to_index_map . find ( key ) ;
if ( index ! = name_to_index_map . end ( ) )
{
2020-07-11 21:04:22 +00:00
if ( nested [ index - > second ] - > insertResultToColumn ( tuple . getColumn ( index - > second ) , value ) )
2019-05-13 23:44:55 +00:00
were_valid_elements = true ;
}
}
}
set_size ( old_size + static_cast < size_t > ( were_valid_elements ) ) ;
return were_valid_elements ;
}
return false ;
}
private :
std : : vector < std : : unique_ptr < Node > > nested ;
std : : vector < String > explicit_names ;
2020-07-11 21:04:22 +00:00
std : : unordered_map < std : : string_view , size_t > name_to_index_map ;
2019-05-13 23:44:55 +00:00
} ;
2019-05-15 23:56:10 +00:00
static std : : unique_ptr < Node > build ( const char * function_name , const DataTypePtr & type )
2019-05-07 23:31:35 +00:00
{
2019-05-13 23:44:55 +00:00
switch ( type - > getTypeId ( ) )
{
case TypeIndex : : UInt8 : return std : : make_unique < NumericNode < UInt8 > > ( ) ;
case TypeIndex : : UInt16 : return std : : make_unique < NumericNode < UInt16 > > ( ) ;
case TypeIndex : : UInt32 : return std : : make_unique < NumericNode < UInt32 > > ( ) ;
case TypeIndex : : UInt64 : return std : : make_unique < NumericNode < UInt64 > > ( ) ;
case TypeIndex : : Int8 : return std : : make_unique < NumericNode < Int8 > > ( ) ;
case TypeIndex : : Int16 : return std : : make_unique < NumericNode < Int16 > > ( ) ;
case TypeIndex : : Int32 : return std : : make_unique < NumericNode < Int32 > > ( ) ;
case TypeIndex : : Int64 : return std : : make_unique < NumericNode < Int64 > > ( ) ;
case TypeIndex : : Float32 : return std : : make_unique < NumericNode < Float32 > > ( ) ;
case TypeIndex : : Float64 : return std : : make_unique < NumericNode < Float64 > > ( ) ;
case TypeIndex : : String : return std : : make_unique < StringNode > ( ) ;
case TypeIndex : : FixedString : return std : : make_unique < FixedStringNode > ( ) ;
2019-05-15 22:00:05 +00:00
case TypeIndex : : Enum8 :
return std : : make_unique < EnumNode < Int8 > > ( static_cast < const DataTypeEnum8 & > ( * type ) . getValues ( ) ) ;
case TypeIndex : : Enum16 :
return std : : make_unique < EnumNode < Int16 > > ( static_cast < const DataTypeEnum16 & > ( * type ) . getValues ( ) ) ;
case TypeIndex : : Nullable :
{
2019-05-15 23:56:10 +00:00
return std : : make_unique < NullableNode > ( build ( function_name , static_cast < const DataTypeNullable & > ( * type ) . getNestedType ( ) ) ) ;
2019-05-15 22:00:05 +00:00
}
case TypeIndex : : Array :
{
2019-05-15 23:56:10 +00:00
return std : : make_unique < ArrayNode > ( build ( function_name , static_cast < const DataTypeArray & > ( * type ) . getNestedType ( ) ) ) ;
2019-05-15 22:00:05 +00:00
}
2019-05-13 23:44:55 +00:00
case TypeIndex : : Tuple :
{
const auto & tuple = static_cast < const DataTypeTuple & > ( * type ) ;
const auto & tuple_elements = tuple . getElements ( ) ;
std : : vector < std : : unique_ptr < Node > > elements ;
for ( const auto & tuple_element : tuple_elements )
2019-05-15 23:56:10 +00:00
elements . emplace_back ( build ( function_name , tuple_element ) ) ;
2019-05-13 23:44:55 +00:00
return std : : make_unique < TupleNode > ( std : : move ( elements ) , tuple . haveExplicitNames ( ) ? tuple . getElementNames ( ) : Strings { } ) ;
}
default :
2019-05-15 22:00:05 +00:00
throw Exception { " Function " + String ( function_name ) + " doesn't support the return type schema: " + type - > getName ( ) , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
2019-05-13 23:44:55 +00:00
}
2019-05-07 23:31:35 +00:00
}
2019-05-15 23:56:10 +00:00
} ;
2020-04-20 10:08:22 +00:00
2019-05-15 23:56:10 +00:00
template < typename JSONParser >
class JSONExtractImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
2019-05-15 23:56:10 +00:00
2020-07-11 21:04:22 +00:00
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
2019-05-15 23:56:10 +00:00
{
if ( arguments . size ( ) < 2 )
throw Exception { " Function " + String ( function_name ) + " requires at least two arguments " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & col = arguments . back ( ) ;
auto col_type_const = typeid_cast < const ColumnConst * > ( col . column . get ( ) ) ;
if ( ! col_type_const | | ! isString ( col . type ) )
throw Exception { " The last argument of function " + String ( function_name )
2020-07-10 14:04:20 +00:00
+ " should be a constant string specifying the return data type, illegal value: " + col . name ,
2019-05-15 23:56:10 +00:00
ErrorCodes : : ILLEGAL_COLUMN } ;
return DataTypeFactory : : instance ( ) . get ( col_type_const - > getValue < String > ( ) ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 2 ; }
2019-05-15 23:56:10 +00:00
void prepare ( const char * function_name , const Block & block , const ColumnNumbers & , size_t result_pos )
{
extract_tree = JSONExtractTree < JSONParser > : : build ( function_name , block . getByPosition ( result_pos ) . type ) ;
}
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
return extract_tree - > insertResultToColumn ( dest , element ) ;
2019-05-15 23:56:10 +00:00
}
protected :
std : : unique_ptr < typename JSONExtractTree < JSONParser > : : Node > extract_tree ;
} ;
template < typename JSONParser >
class JSONExtractKeysAndValuesImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
2019-05-15 23:56:10 +00:00
2020-07-11 21:04:22 +00:00
static DataTypePtr getReturnType ( const char * function_name , const ColumnsWithTypeAndName & arguments )
2019-05-15 23:56:10 +00:00
{
if ( arguments . size ( ) < 2 )
throw Exception { " Function " + String ( function_name ) + " requires at least two arguments " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
const auto & col = arguments . back ( ) ;
auto col_type_const = typeid_cast < const ColumnConst * > ( col . column . get ( ) ) ;
if ( ! col_type_const | | ! isString ( col . type ) )
throw Exception { " The last argument of function " + String ( function_name )
2020-07-10 14:04:20 +00:00
+ " should be a constant string specifying the values' data type, illegal value: " + col . name ,
2019-05-15 23:56:10 +00:00
ErrorCodes : : ILLEGAL_COLUMN } ;
DataTypePtr key_type = std : : make_unique < DataTypeString > ( ) ;
2020-04-20 10:08:22 +00:00
DataTypePtr value_type = DataTypeFactory : : instance ( ) . get ( col_type_const - > getValue < String > ( ) ) ;
2019-05-15 23:56:10 +00:00
DataTypePtr tuple_type = std : : make_unique < DataTypeTuple > ( DataTypes { key_type , value_type } ) ;
return std : : make_unique < DataTypeArray > ( tuple_type ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 2 ; }
2019-05-15 23:56:10 +00:00
void prepare ( const char * function_name , const Block & block , const ColumnNumbers & , size_t result_pos )
{
const auto & result_type = block . getByPosition ( result_pos ) . type ;
const auto tuple_type = typeid_cast < const DataTypeArray * > ( result_type . get ( ) ) - > getNestedType ( ) ;
const auto value_type = typeid_cast < const DataTypeTuple * > ( tuple_type . get ( ) ) - > getElements ( ) [ 1 ] ;
extract_tree = JSONExtractTree < JSONParser > : : build ( function_name , value_type ) ;
}
2020-07-11 21:04:22 +00:00
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isObject ( ) )
2019-05-15 23:56:10 +00:00
return false ;
2020-07-11 21:04:22 +00:00
auto object = element . getObject ( ) ;
2019-08-21 02:28:04 +00:00
auto & col_arr = assert_cast < ColumnArray & > ( dest ) ;
auto & col_tuple = assert_cast < ColumnTuple & > ( col_arr . getData ( ) ) ;
2019-05-15 23:56:10 +00:00
size_t old_size = col_tuple . size ( ) ;
2019-08-21 02:28:04 +00:00
auto & col_key = assert_cast < ColumnString & > ( col_tuple . getColumn ( 0 ) ) ;
2019-05-15 23:56:10 +00:00
auto & col_value = col_tuple . getColumn ( 1 ) ;
2020-07-11 21:04:22 +00:00
for ( auto [ key , value ] : object )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( extract_tree - > insertResultToColumn ( col_value , value ) )
col_key . insertData ( key . data ( ) , key . size ( ) ) ;
2019-05-15 23:56:10 +00:00
}
if ( col_tuple . size ( ) = = old_size )
return false ;
col_arr . getOffsets ( ) . push_back ( col_tuple . size ( ) ) ;
return true ;
}
private :
std : : unique_ptr < typename JSONExtractTree < JSONParser > : : Node > extract_tree ;
} ;
template < typename JSONParser >
class JSONExtractRawImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-05-15 23:56:10 +00:00
{
return std : : make_shared < DataTypeString > ( ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-05-15 23:56:10 +00:00
{
2019-08-21 02:28:04 +00:00
ColumnString & col_str = assert_cast < ColumnString & > ( dest ) ;
2019-05-15 23:56:10 +00:00
auto & chars = col_str . getChars ( ) ;
WriteBufferFromVector < ColumnString : : Chars > buf ( chars , WriteBufferFromVector < ColumnString : : Chars > : : AppendModeTag ( ) ) ;
2020-07-11 21:04:22 +00:00
traverse ( element , buf ) ;
2020-01-10 21:42:26 +00:00
buf . finalize ( ) ;
2019-05-15 23:56:10 +00:00
chars . push_back ( 0 ) ;
col_str . getOffsets ( ) . push_back ( chars . size ( ) ) ;
return true ;
}
private :
2020-07-11 21:04:22 +00:00
static void traverse ( const Element & element , WriteBuffer & buf )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( element . isInt64 ( ) )
2019-05-16 18:22:59 +00:00
{
2020-07-11 21:04:22 +00:00
writeIntText ( element . getInt64 ( ) , buf ) ;
2019-05-16 18:22:59 +00:00
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isUInt64 ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
writeIntText ( element . getUInt64 ( ) , buf ) ;
2019-05-15 23:56:10 +00:00
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isDouble ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
writeFloatText ( element . getDouble ( ) , buf ) ;
2019-05-15 23:56:10 +00:00
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isBool ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( element . getBool ( ) )
2019-05-15 23:56:10 +00:00
writeCString ( " true " , buf ) ;
else
writeCString ( " false " , buf ) ;
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isString ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
writeJSONString ( element . getString ( ) , buf , format_settings ( ) ) ;
2019-05-15 23:56:10 +00:00
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isArray ( ) )
2019-05-15 23:56:10 +00:00
{
writeChar ( ' [ ' , buf ) ;
2020-07-11 21:04:22 +00:00
bool need_comma = false ;
for ( auto value : element . getArray ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( std : : exchange ( need_comma , true ) )
2019-05-15 23:56:10 +00:00
writeChar ( ' , ' , buf ) ;
2020-07-11 21:04:22 +00:00
traverse ( value , buf ) ;
2019-05-15 23:56:10 +00:00
}
writeChar ( ' ] ' , buf ) ;
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isObject ( ) )
2019-05-15 23:56:10 +00:00
{
writeChar ( ' { ' , buf ) ;
2020-07-11 21:04:22 +00:00
bool need_comma = false ;
for ( auto [ key , value ] : element . getObject ( ) )
2019-05-15 23:56:10 +00:00
{
2020-07-11 21:04:22 +00:00
if ( std : : exchange ( need_comma , true ) )
writeChar ( ' , ' , buf ) ;
2019-05-15 23:56:10 +00:00
writeJSONString ( key , buf , format_settings ( ) ) ;
writeChar ( ' : ' , buf ) ;
2020-07-11 21:04:22 +00:00
traverse ( value , buf ) ;
2019-05-15 23:56:10 +00:00
}
writeChar ( ' } ' , buf ) ;
return ;
}
2020-07-11 21:04:22 +00:00
if ( element . isNull ( ) )
2019-05-15 23:56:10 +00:00
{
writeCString ( " null " , buf ) ;
return ;
}
}
2019-05-13 23:44:55 +00:00
2019-05-15 23:56:10 +00:00
static const FormatSettings & format_settings ( )
{
static const FormatSettings the_instance = [ ]
{
FormatSettings settings ;
settings . json . escape_forward_slashes = false ;
return settings ;
} ( ) ;
return the_instance ;
}
2019-05-07 23:31:35 +00:00
} ;
2020-04-20 10:08:22 +00:00
2019-12-08 00:18:24 +00:00
template < typename JSONParser >
class JSONExtractArrayRawImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2019-12-08 00:18:24 +00:00
{
return std : : make_shared < DataTypeArray > ( std : : make_shared < DataTypeString > ( ) ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
static bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2019-12-08 00:18:24 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isArray ( ) )
2019-12-08 00:18:24 +00:00
return false ;
2020-04-20 10:08:22 +00:00
2020-07-11 21:04:22 +00:00
auto array = element . getArray ( ) ;
2019-12-08 00:18:24 +00:00
ColumnArray & col_res = assert_cast < ColumnArray & > ( dest ) ;
2020-07-11 21:04:22 +00:00
for ( auto value : array )
JSONExtractRawImpl < JSONParser > : : insertResultToColumn ( col_res . getData ( ) , value , { } ) ;
col_res . getOffsets ( ) . push_back ( col_res . getOffsets ( ) . back ( ) + array . size ( ) ) ;
2019-12-08 00:18:24 +00:00
return true ;
}
} ;
2020-04-20 10:08:22 +00:00
template < typename JSONParser >
class JSONExtractKeysAndValuesRawImpl
{
public :
2020-07-11 21:04:22 +00:00
using Element = typename JSONParser : : Element ;
2020-04-20 10:08:22 +00:00
2020-07-11 21:04:22 +00:00
static DataTypePtr getReturnType ( const char * , const ColumnsWithTypeAndName & )
2020-04-20 10:08:22 +00:00
{
2020-04-20 20:28:54 +00:00
DataTypePtr string_type = std : : make_unique < DataTypeString > ( ) ;
DataTypePtr tuple_type = std : : make_unique < DataTypeTuple > ( DataTypes { string_type , string_type } ) ;
2020-04-20 10:08:22 +00:00
return std : : make_unique < DataTypeArray > ( tuple_type ) ;
}
2020-07-11 21:04:22 +00:00
static size_t getNumberOfIndexArguments ( const Block & , const ColumnNumbers & arguments ) { return arguments . size ( ) - 1 ; }
bool insertResultToColumn ( IColumn & dest , const Element & element , const std : : string_view & )
2020-04-20 10:08:22 +00:00
{
2020-07-11 21:04:22 +00:00
if ( ! element . isObject ( ) )
2020-04-20 10:08:22 +00:00
return false ;
2020-07-11 21:04:22 +00:00
auto object = element . getObject ( ) ;
2020-04-20 10:08:22 +00:00
auto & col_arr = assert_cast < ColumnArray & > ( dest ) ;
auto & col_tuple = assert_cast < ColumnTuple & > ( col_arr . getData ( ) ) ;
auto & col_key = assert_cast < ColumnString & > ( col_tuple . getColumn ( 0 ) ) ;
auto & col_value = assert_cast < ColumnString & > ( col_tuple . getColumn ( 1 ) ) ;
2020-07-11 21:04:22 +00:00
for ( auto [ key , value ] : object )
2020-04-20 10:08:22 +00:00
{
2020-07-11 21:04:22 +00:00
col_key . insertData ( key . data ( ) , key . size ( ) ) ;
JSONExtractRawImpl < JSONParser > : : insertResultToColumn ( col_value , value , { } ) ;
2020-04-20 10:08:22 +00:00
}
2020-07-11 21:04:22 +00:00
col_arr . getOffsets ( ) . push_back ( col_arr . getOffsets ( ) . back ( ) + object . size ( ) ) ;
2020-04-20 10:08:22 +00:00
return true ;
}
} ;
2019-05-07 23:31:35 +00:00
}