2018-09-09 20:57:54 +00:00
# include <Functions/IFunction.h>
# include <Functions/FunctionFactory.h>
# include <Functions/FunctionHelpers.h>
# include <DataTypes/DataTypeArray.h>
# include <DataTypes/DataTypesNumber.h>
# include <Columns/ColumnArray.h>
# include <Columns/ColumnConst.h>
# include <Columns/ColumnNullable.h>
2018-09-09 21:15:40 +00:00
# include <Columns/ColumnString.h>
2018-09-09 20:57:54 +00:00
# include <Common/HashTable/ClearableHashSet.h>
2019-02-10 17:40:52 +00:00
# include <Common/ColumnsHashing.h>
2018-09-09 20:57:54 +00:00
# include <Interpreters/AggregationCommon.h>
# include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH ;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH ;
extern const int ILLEGAL_COLUMN ;
extern const int ILLEGAL_TYPE_OF_ARGUMENT ;
}
/// Counts the number of different elements in the array, or the number of different tuples from the elements at the corresponding positions in several arrays.
/// NOTE The implementation partially matches arrayEnumerateUniq.
class FunctionArrayUniq : public IFunction
{
public :
static constexpr auto name = " arrayUniq " ;
static FunctionPtr create ( const Context & ) { return std : : make_shared < FunctionArrayUniq > ( ) ; }
String getName ( ) const override { return name ; }
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
bool useDefaultImplementationForConstants ( ) const override { return true ; }
DataTypePtr getReturnTypeImpl ( const DataTypes & arguments ) const override
{
if ( arguments . size ( ) = = 0 )
throw Exception ( " Number of arguments for function " + getName ( ) + " doesn't match: passed "
+ toString ( arguments . size ( ) ) + " , should be at least 1. " ,
ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH ) ;
for ( size_t i = 0 ; i < arguments . size ( ) ; + + i )
{
const DataTypeArray * array_type = checkAndGetDataType < DataTypeArray > ( arguments [ i ] . get ( ) ) ;
if ( ! array_type )
throw Exception ( " All arguments for function " + getName ( ) + " must be arrays but argument " +
toString ( i + 1 ) + " has type " + arguments [ i ] - > getName ( ) + " . " , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
}
return std : : make_shared < DataTypeUInt32 > ( ) ;
}
void executeImpl ( Block & block , const ColumnNumbers & arguments , size_t result , size_t input_rows_count ) override ;
private :
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9 ;
2019-02-04 19:01:26 +00:00
template < typename T >
struct MethodOneNumber
{
using Set = ClearableHashSet < T , DefaultHash < T > , HashTableGrower < INITIAL_SIZE_DEGREE > ,
HashTableAllocatorWithStackMemory < ( 1ULL < < INITIAL_SIZE_DEGREE ) * sizeof ( T ) > > ;
using Method = ColumnsHashing : : HashMethodOneNumber < typename Set : : value_type , void , T , false > ;
} ;
struct MethodString
{
using Set = ClearableHashSet < StringRef , StringRefHash , HashTableGrower < INITIAL_SIZE_DEGREE > ,
HashTableAllocatorWithStackMemory < ( 1ULL < < INITIAL_SIZE_DEGREE ) * sizeof ( StringRef ) > > ;
2019-02-05 11:14:09 +00:00
using Method = ColumnsHashing : : HashMethodString < typename Set : : value_type , void , false , false > ;
2019-02-04 19:01:26 +00:00
} ;
struct MethodFixedString
{
using Set = ClearableHashSet < StringRef , StringRefHash , HashTableGrower < INITIAL_SIZE_DEGREE > ,
HashTableAllocatorWithStackMemory < ( 1ULL < < INITIAL_SIZE_DEGREE ) * sizeof ( StringRef ) > > ;
2019-02-05 11:14:09 +00:00
using Method = ColumnsHashing : : HashMethodFixedString < typename Set : : value_type , void , false , false > ;
2019-02-04 19:01:26 +00:00
} ;
struct MethodFixed
{
using Set = ClearableHashSet < UInt128 , UInt128HashCRC32 , HashTableGrower < INITIAL_SIZE_DEGREE > ,
HashTableAllocatorWithStackMemory < ( 1ULL < < INITIAL_SIZE_DEGREE ) * sizeof ( UInt128 ) > > ;
using Method = ColumnsHashing : : HashMethodKeysFixed < typename Set : : value_type , UInt128 , void , false , false , false > ;
} ;
struct MethodHashed
{
using Set = ClearableHashSet < UInt128 , UInt128TrivialHash , HashTableGrower < INITIAL_SIZE_DEGREE > ,
HashTableAllocatorWithStackMemory < ( 1ULL < < INITIAL_SIZE_DEGREE ) * sizeof ( UInt128 ) > > ;
using Method = ColumnsHashing : : HashMethodHashed < typename Set : : value_type , void , false > ;
} ;
template < typename Method >
void executeMethod ( const ColumnArray : : Offsets & offsets , const ColumnRawPtrs & columns , const Sizes & key_sizes ,
const NullMap * null_map , ColumnUInt32 : : Container & res_values ) ;
template < typename Method , bool has_null_map >
void executeMethodImpl ( const ColumnArray : : Offsets & offsets , const ColumnRawPtrs & columns , const Sizes & key_sizes ,
const NullMap * null_map , ColumnUInt32 : : Container & res_values ) ;
2018-09-09 20:57:54 +00:00
template < typename T >
2018-12-23 01:41:03 +00:00
bool executeNumber ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values ) ;
bool executeString ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values ) ;
2019-02-04 19:01:26 +00:00
bool executeFixedString ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values ) ;
2018-12-23 01:41:03 +00:00
bool execute128bit ( const ColumnArray : : Offsets & offsets , const ColumnRawPtrs & columns , ColumnUInt32 : : Container & res_values ) ;
2019-02-04 19:01:26 +00:00
void executeHashed ( const ColumnArray : : Offsets & offsets , const ColumnRawPtrs & columns , ColumnUInt32 : : Container & res_values ) ;
2018-09-09 20:57:54 +00:00
} ;
void FunctionArrayUniq : : executeImpl ( Block & block , const ColumnNumbers & arguments , size_t result , size_t /*input_rows_count*/ )
{
const ColumnArray : : Offsets * offsets = nullptr ;
2018-12-23 01:41:03 +00:00
size_t num_arguments = arguments . size ( ) ;
ColumnRawPtrs data_columns ( num_arguments ) ;
2018-09-09 20:57:54 +00:00
2018-12-23 01:41:03 +00:00
Columns array_holders ;
for ( size_t i = 0 ; i < num_arguments ; + + i )
2018-09-09 20:57:54 +00:00
{
2018-12-23 01:41:03 +00:00
const ColumnPtr & array_ptr = block . getByPosition ( arguments [ i ] ) . column ;
2018-09-09 20:57:54 +00:00
const ColumnArray * array = checkAndGetColumn < ColumnArray > ( array_ptr . get ( ) ) ;
if ( ! array )
{
const ColumnConst * const_array = checkAndGetColumnConst < ColumnArray > (
block . getByPosition ( arguments [ i ] ) . column . get ( ) ) ;
if ( ! const_array )
throw Exception ( " Illegal column " + block . getByPosition ( arguments [ i ] ) . column - > getName ( )
2018-12-23 01:41:03 +00:00
+ " of " + toString ( i + 1 ) + " -th argument of function " + getName ( ) ,
2018-09-09 20:57:54 +00:00
ErrorCodes : : ILLEGAL_COLUMN ) ;
2018-12-23 01:41:03 +00:00
array_holders . emplace_back ( const_array - > convertToFullColumn ( ) ) ;
array = checkAndGetColumn < ColumnArray > ( array_holders . back ( ) . get ( ) ) ;
2018-09-09 20:57:54 +00:00
}
const ColumnArray : : Offsets & offsets_i = array - > getOffsets ( ) ;
if ( i = = 0 )
offsets = & offsets_i ;
else if ( offsets_i ! = * offsets )
throw Exception ( " Lengths of all arrays passed to " + getName ( ) + " must be equal. " ,
ErrorCodes : : SIZES_OF_ARRAYS_DOESNT_MATCH ) ;
2018-12-23 01:41:03 +00:00
auto * array_data = & array - > getData ( ) ;
data_columns [ i ] = array_data ;
}
const NullMap * null_map = nullptr ;
2018-09-09 20:57:54 +00:00
2018-12-23 01:41:03 +00:00
for ( size_t i = 0 ; i < num_arguments ; + + i )
{
2019-06-27 18:50:20 +00:00
if ( auto * nullable_col = checkAndGetColumn < ColumnNullable > ( * data_columns [ i ] ) )
2018-09-09 20:57:54 +00:00
{
2018-12-23 01:41:03 +00:00
if ( num_arguments = = 1 )
2019-06-26 17:20:33 +00:00
data_columns [ i ] = & nullable_col - > getNestedColumn ( ) ;
2018-12-23 01:41:03 +00:00
2019-06-26 17:20:33 +00:00
null_map = & nullable_col - > getNullMapData ( ) ;
2018-12-23 01:41:03 +00:00
break ;
2018-09-09 20:57:54 +00:00
}
}
auto res = ColumnUInt32 : : create ( ) ;
ColumnUInt32 : : Container & res_values = res - > getData ( ) ;
res_values . resize ( offsets - > size ( ) ) ;
2018-12-23 01:41:03 +00:00
if ( num_arguments = = 1 )
2018-09-09 20:57:54 +00:00
{
2019-02-04 19:01:26 +00:00
if ( ! ( executeNumber < UInt8 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
2018-12-23 01:41:03 +00:00
| | executeNumber < UInt16 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < UInt32 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < UInt64 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Int8 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Int16 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Int32 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Int64 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Float32 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeNumber < Float64 > ( * offsets , * data_columns [ 0 ] , null_map , res_values )
2019-02-04 19:01:26 +00:00
| | executeFixedString ( * offsets , * data_columns [ 0 ] , null_map , res_values )
| | executeString ( * offsets , * data_columns [ 0 ] , null_map , res_values ) ) )
executeHashed ( * offsets , data_columns , res_values ) ;
2018-09-09 20:57:54 +00:00
}
else
{
2019-02-04 19:01:26 +00:00
if ( ! execute128bit ( * offsets , data_columns , res_values ) )
executeHashed ( * offsets , data_columns , res_values ) ;
2018-09-09 20:57:54 +00:00
}
block . getByPosition ( result ) . column = std : : move ( res ) ;
}
2019-02-04 19:01:26 +00:00
template < typename Method , bool has_null_map >
void FunctionArrayUniq : : executeMethodImpl (
const ColumnArray : : Offsets & offsets ,
const ColumnRawPtrs & columns ,
const Sizes & key_sizes ,
[[maybe_unused]] const NullMap * null_map ,
ColumnUInt32 : : Container & res_values )
2018-09-09 20:57:54 +00:00
{
2019-02-04 19:01:26 +00:00
typename Method : : Set set ;
typename Method : : Method method ( columns , key_sizes , nullptr ) ;
Arena pool ; /// Won't use it;
2018-09-09 20:57:54 +00:00
ColumnArray : : Offset prev_off = 0 ;
for ( size_t i = 0 ; i < offsets . size ( ) ; + + i )
{
set . clear ( ) ;
bool found_null = false ;
ColumnArray : : Offset off = offsets [ i ] ;
for ( ColumnArray : : Offset j = prev_off ; j < off ; + + j )
{
2019-02-04 19:01:26 +00:00
if constexpr ( has_null_map )
{
if ( ( * null_map ) [ j ] )
{
found_null = true ;
continue ;
}
}
method . emplaceKey ( set , j , pool ) ;
2018-09-09 20:57:54 +00:00
}
res_values [ i ] = set . size ( ) + found_null ;
prev_off = off ;
}
}
2019-02-04 19:01:26 +00:00
template < typename Method >
void FunctionArrayUniq : : executeMethod (
const ColumnArray : : Offsets & offsets ,
const ColumnRawPtrs & columns ,
const Sizes & key_sizes ,
const NullMap * null_map ,
ColumnUInt32 : : Container & res_values )
{
if ( null_map )
executeMethodImpl < Method , true > ( offsets , columns , key_sizes , null_map , res_values ) ;
else
executeMethodImpl < Method , false > ( offsets , columns , key_sizes , null_map , res_values ) ;
}
template < typename T >
bool FunctionArrayUniq : : executeNumber ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values )
2018-09-09 20:57:54 +00:00
{
2019-02-04 19:01:26 +00:00
const auto * nested = checkAndGetColumn < ColumnVector < T > > ( & data ) ;
2018-09-09 20:57:54 +00:00
if ( ! nested )
return false ;
2019-02-04 19:01:26 +00:00
executeMethod < MethodOneNumber < T > > ( offsets , { nested } , { } , null_map , res_values ) ;
return true ;
}
2018-09-09 20:57:54 +00:00
2019-02-04 19:01:26 +00:00
bool FunctionArrayUniq : : executeString ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values )
{
const auto * nested = checkAndGetColumn < ColumnString > ( & data ) ;
if ( nested )
executeMethod < MethodString > ( offsets , { nested } , { } , null_map , res_values ) ;
2018-09-09 20:57:54 +00:00
2019-02-04 19:01:26 +00:00
return nested ;
2018-09-09 20:57:54 +00:00
}
2019-02-04 19:01:26 +00:00
bool FunctionArrayUniq : : executeFixedString ( const ColumnArray : : Offsets & offsets , const IColumn & data , const NullMap * null_map , ColumnUInt32 : : Container & res_values )
{
const auto * nested = checkAndGetColumn < ColumnFixedString > ( & data ) ;
if ( nested )
executeMethod < MethodFixedString > ( offsets , { nested } , { } , null_map , res_values ) ;
return nested ;
}
2018-09-09 20:57:54 +00:00
bool FunctionArrayUniq : : execute128bit (
2019-02-04 19:01:26 +00:00
const ColumnArray : : Offsets & offsets ,
const ColumnRawPtrs & columns ,
ColumnUInt32 : : Container & res_values )
2018-09-09 20:57:54 +00:00
{
size_t count = columns . size ( ) ;
size_t keys_bytes = 0 ;
Sizes key_sizes ( count ) ;
for ( size_t j = 0 ; j < count ; + + j )
{
if ( ! columns [ j ] - > isFixedAndContiguous ( ) )
return false ;
key_sizes [ j ] = columns [ j ] - > sizeOfValueIfFixed ( ) ;
keys_bytes + = key_sizes [ j ] ;
}
if ( keys_bytes > 16 )
return false ;
2019-02-04 19:01:26 +00:00
executeMethod < MethodFixed > ( offsets , columns , key_sizes , nullptr , res_values ) ;
2018-09-09 20:57:54 +00:00
return true ;
}
2019-02-04 19:01:26 +00:00
void FunctionArrayUniq : : executeHashed (
const ColumnArray : : Offsets & offsets ,
const ColumnRawPtrs & columns ,
ColumnUInt32 : : Container & res_values )
2018-09-09 20:57:54 +00:00
{
2019-02-04 19:01:26 +00:00
executeMethod < MethodHashed > ( offsets , columns , { } , nullptr , res_values ) ;
2018-09-09 20:57:54 +00:00
}
void registerFunctionArrayUniq ( FunctionFactory & factory )
{
factory . registerFunction < FunctionArrayUniq > ( ) ;
}
}