2012-08-23 20:22:44 +00:00
# include <DB/Core/Field.h>
2015-10-12 07:05:54 +00:00
# include <DB/Core/FieldVisitors.h>
2012-08-23 20:22:44 +00:00
# include <DB/Columns/ColumnString.h>
# include <DB/Columns/ColumnFixedString.h>
# include <DB/Columns/ColumnsNumber.h>
2012-08-23 23:49:28 +00:00
# include <DB/DataStreams/IProfilingBlockInputStream.h>
2012-08-24 19:42:03 +00:00
# include <DB/DataStreams/OneBlockInputStream.h>
# include <DB/Parsers/ASTExpressionList.h>
# include <DB/Parsers/ASTFunction.h>
# include <DB/Parsers/ASTLiteral.h>
2012-08-23 23:49:28 +00:00
2012-08-23 20:22:44 +00:00
# include <DB/Interpreters/Set.h>
2015-06-12 05:18:47 +00:00
# include <DB/Interpreters/ExpressionAnalyzer.h>
# include <DB/Interpreters/ExpressionActions.h>
2013-03-19 12:25:59 +00:00
# include <DB/DataTypes/DataTypeArray.h>
2013-03-25 13:02:12 +00:00
# include <DB/DataTypes/DataTypesNumberFixed.h>
# include <DB/DataTypes/DataTypeString.h>
# include <DB/DataTypes/DataTypeFixedString.h>
2015-06-12 05:54:49 +00:00
# include <DB/DataTypes/DataTypeDate.h>
# include <DB/DataTypes/DataTypeDateTime.h>
2012-08-23 20:22:44 +00:00
namespace DB
{
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
void SetVariants : : init ( Type type_ )
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
type = type_ ;
switch ( type )
{
case Type : : EMPTY : break ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : NAME . reset ( new decltype ( NAME ) : : element_type ) ; break ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
}
size_t SetVariants : : getTotalRowCount ( ) const
{
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . size ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
2013-06-20 12:12:27 +00:00
}
2015-03-02 01:11:37 +00:00
size_t SetVariants : : getTotalByteCount ( ) const
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . getBufferSizeInBytes ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
}
2013-06-20 12:12:27 +00:00
bool Set : : checkSetSizeLimits ( ) const
{
2015-03-02 01:11:37 +00:00
if ( max_rows & & data . getTotalRowCount ( ) > max_rows )
2013-06-20 12:12:27 +00:00
return false ;
2015-03-02 01:11:37 +00:00
if ( max_bytes & & data . getTotalByteCount ( ) > max_bytes )
2013-06-20 12:12:27 +00:00
return false ;
return true ;
}
2014-06-26 00:58:14 +00:00
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
SetVariants : : Type SetVariants : : chooseMethod ( const ConstColumnPlainPtrs & key_columns , Sizes & key_sizes )
2012-08-23 20:22:44 +00:00
{
2013-07-19 19:53:51 +00:00
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
bool all_fixed = true ;
2012-08-23 20:22:44 +00:00
size_t keys_bytes = 0 ;
key_sizes . resize ( keys_size ) ;
for ( size_t j = 0 ; j < keys_size ; + + j )
{
2013-07-19 20:12:02 +00:00
if ( ! key_columns [ j ] - > isFixed ( ) )
2012-08-23 20:22:44 +00:00
{
2015-03-02 01:11:37 +00:00
all_fixed = false ;
2012-08-23 20:22:44 +00:00
break ;
}
2013-07-19 19:53:51 +00:00
key_sizes [ j ] = key_columns [ j ] - > sizeOfField ( ) ;
2012-08-23 20:22:44 +00:00
keys_bytes + = key_sizes [ j ] ;
}
2014-11-06 03:03:40 +00:00
2013-07-19 20:12:02 +00:00
/// Если есть один числовой ключ, который помещается в 64 бита
2013-07-19 19:53:51 +00:00
if ( keys_size = = 1 & & key_columns [ 0 ] - > isNumeric ( ) )
2015-03-02 01:11:37 +00:00
{
size_t size_of_field = key_columns [ 0 ] - > sizeOfField ( ) ;
if ( size_of_field = = 1 )
return SetVariants : : Type : : key8 ;
if ( size_of_field = = 2 )
return SetVariants : : Type : : key16 ;
if ( size_of_field = = 4 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key32 ;
2015-03-02 01:11:37 +00:00
if ( size_of_field = = 8 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key64 ;
2015-03-02 01:11:37 +00:00
throw Exception ( " Logical error: numeric column has sizeOfField not in 1, 2, 4, 8. " , ErrorCodes : : LOGICAL_ERROR ) ;
}
/// Если ключи помещаются в N бит, будем использовать хэш-таблицу по упакованным в N-бит ключам
if ( all_fixed & & keys_bytes < = 16 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys128 ;
2015-03-02 01:11:37 +00:00
if ( all_fixed & & keys_bytes < = 32 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys256 ;
2012-08-23 20:22:44 +00:00
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & ( typeid_cast < const ColumnString * > ( key_columns [ 0 ] ) | | typeid_cast < const ColumnConstString * > ( key_columns [ 0 ] ) ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_string ;
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & typeid_cast < const ColumnFixedString * > ( key_columns [ 0 ] ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_fixed_string ;
2015-03-02 01:11:37 +00:00
/// Иначе будем агрегировать по конкатенации ключей.
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : hashed ;
2015-03-02 01:11:37 +00:00
}
template < typename Method >
2015-03-02 05:41:21 +00:00
void NO_INLINE Set : : insertFromBlockImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
size_t rows ,
SetVariants & variants )
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
/// Для всех строчек
2015-03-02 05:41:21 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2015-03-02 01:11:37 +00:00
{
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
typename Method : : Data : : iterator it = method . data . find ( key ) ;
bool inserted ;
method . data . emplace ( key , it , inserted ) ;
if ( inserted )
2015-03-03 20:00:39 +00:00
method . onNewKey ( * it , keys_size , i , variants . string_pool ) ;
2015-03-02 01:11:37 +00:00
}
2012-08-23 20:22:44 +00:00
}
2015-01-27 00:52:03 +00:00
bool Set : : insertFromBlock ( const Block & block , bool create_ordered_set )
2012-08-23 20:22:44 +00:00
{
2015-01-27 00:52:03 +00:00
Poco : : ScopedWriteRWLock lock ( rwlock ) ;
2014-03-04 11:26:55 +00:00
size_t keys_size = block . columns ( ) ;
ConstColumnPlainPtrs key_columns ( keys_size ) ;
data_types . resize ( keys_size ) ;
2015-03-15 07:56:46 +00:00
/// Константные столбцы справа от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
2014-03-04 11:26:55 +00:00
/// Запоминаем столбцы, с которыми будем работать
for ( size_t i = 0 ; i < keys_size ; + + i )
2014-03-04 11:26:55 +00:00
{
2014-03-04 11:26:55 +00:00
key_columns [ i ] = block . getByPosition ( i ) . column ;
data_types [ i ] = block . getByPosition ( i ) . type ;
2015-03-15 07:56:46 +00:00
if ( key_columns [ i ] - > isConst ( ) )
{
materialized_columns . emplace_back ( static_cast < const IColumnConst * > ( key_columns [ i ] ) - > convertToFullColumn ( ) ) ;
key_columns [ i ] = materialized_columns . back ( ) . get ( ) ;
}
2014-03-04 11:26:55 +00:00
}
2012-08-23 20:22:44 +00:00
2014-03-04 11:26:55 +00:00
size_t rows = block . rows ( ) ;
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
/// Какую структуру данных для множества использовать?
2014-05-01 15:02:36 +00:00
if ( empty ( ) )
2015-03-02 01:11:37 +00:00
data . init ( data . chooseMethod ( key_columns , key_sizes ) ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
insertFromBlockImpl ( * data . NAME , key_columns , rows , data ) ;
2015-03-02 05:41:21 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 23:49:28 +00:00
2015-03-02 01:11:37 +00:00
if ( create_ordered_set )
2014-03-04 11:26:55 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2015-03-02 05:41:21 +00:00
ordered_set_elements - > push_back ( ( * key_columns [ 0 ] ) [ i ] ) ; /// ordered_set для индекса работает только если IN одному ключу.
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
if ( ! checkSetSizeLimits ( ) )
2012-08-23 23:49:28 +00:00
{
2014-03-04 11:26:55 +00:00
if ( overflow_mode = = OverflowMode : : THROW )
2015-03-02 01:11:37 +00:00
throw Exception ( " IN-set size exceeded. "
" Rows: " + toString ( data . getTotalRowCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_rows ) +
2015-03-02 01:11:37 +00:00
" . Bytes: " + toString ( data . getTotalByteCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_bytes ) + " . " ,
ErrorCodes : : SET_SIZE_LIMIT_EXCEEDED ) ;
if ( overflow_mode = = OverflowMode : : BREAK )
return false ;
throw Exception ( " Logical error: unknown overflow mode " , ErrorCodes : : LOGICAL_ERROR ) ;
2012-08-23 23:49:28 +00:00
}
2014-03-04 11:26:55 +00:00
return true ;
2012-08-23 20:22:44 +00:00
}
2015-10-08 21:22:49 +00:00
/** Проверка попадания Field from, имеющим тип From в диапазон значений типа To.
* From и To - ч и с л о в ы е т и п ы . М о г у т б ы т ь т и п а м и с п л а в а ю щ е й з а п я т о й .
* From - э т о о д н о и з UInt64 , Int64 , Float64 ,
* т о г д а к а к To м о ж е т б ы т ь т а к ж е 8 , 16 , 32 б и т н ы м .
*
* Е с л и п о п а д а е т в д и а п а з о н , т о from к о н в е р т и р у е т с я в Field б л и ж а й ш е г о к To т и п а .
* Е с л и н е п о п а д а е т - в о з в р а щ а е т с я Field ( Null ) .
2014-10-06 20:33:12 +00:00
*/
2015-10-08 21:22:49 +00:00
template < typename From , typename To >
static Field convertNumericTypeImpl ( const Field & from )
2014-10-06 20:33:12 +00:00
{
2015-10-08 21:22:49 +00:00
From value = from . get < From > ( ) ;
2014-10-06 20:33:12 +00:00
2015-10-12 04:27:02 +00:00
if ( static_cast < long double > ( value ) ! = static_cast < long double > ( To ( value ) ) )
2015-10-08 21:22:49 +00:00
return { } ;
2014-10-06 20:33:12 +00:00
2015-10-08 21:22:49 +00:00
return Field ( typename NearestFieldType < To > : : Type ( value ) ) ;
}
2014-10-07 22:20:32 +00:00
2015-10-08 21:22:49 +00:00
template < typename To >
static Field convertNumericType ( const Field & from , const IDataType & type )
{
if ( from . getType ( ) = = Field : : Types : : UInt64 )
return convertNumericTypeImpl < UInt64 , To > ( from ) ;
if ( from . getType ( ) = = Field : : Types : : Int64 )
return convertNumericTypeImpl < Int64 , To > ( from ) ;
if ( from . getType ( ) = = Field : : Types : : Float64 )
return convertNumericTypeImpl < Float64 , To > ( from ) ;
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-10-12 04:27:02 +00:00
+ Field : : Types : : toString ( from . getType ( ) ) + " at right " , ErrorCodes : : TYPE_MISMATCH ) ;
2015-10-08 21:22:49 +00:00
}
/** Чтобы корректно работали выражения вида 1.0 IN (1) или чтобы 1 IN (1, 2.0, 2.5, -1) работало так же, как 1 IN (1, 2).
* П р о в е р я е т с о в м е с т и м о с т ь т и п о в , п р о в е р я е т п о п а д а н и е з н а ч е н и й в д и а п а з о н д о п у с т и м ы х з н а ч е н и й т и п а , д е л а е т п р е о б р а з о в а н и е т и п а .
* Е с л и з н а ч е н и е н е п о п а д а е т в д и а п а з о н - в о з в р а щ а е т Null .
*/
static Field convertToType ( const Field & src , const IDataType & type )
{
if ( type . isNumeric ( ) )
{
if ( typeid_cast < const DataTypeUInt8 * > ( & type ) ) return convertNumericType < UInt8 > ( src , type ) ;
if ( typeid_cast < const DataTypeUInt16 * > ( & type ) ) return convertNumericType < UInt16 > ( src , type ) ;
if ( typeid_cast < const DataTypeUInt32 * > ( & type ) ) return convertNumericType < UInt32 > ( src , type ) ;
if ( typeid_cast < const DataTypeUInt64 * > ( & type ) ) return convertNumericType < UInt64 > ( src , type ) ;
if ( typeid_cast < const DataTypeInt8 * > ( & type ) ) return convertNumericType < Int8 > ( src , type ) ;
if ( typeid_cast < const DataTypeInt16 * > ( & type ) ) return convertNumericType < Int16 > ( src , type ) ;
if ( typeid_cast < const DataTypeInt32 * > ( & type ) ) return convertNumericType < Int32 > ( src , type ) ;
if ( typeid_cast < const DataTypeInt64 * > ( & type ) ) return convertNumericType < Int64 > ( src , type ) ;
if ( typeid_cast < const DataTypeFloat32 * > ( & type ) ) return convertNumericType < Float32 > ( src , type ) ;
if ( typeid_cast < const DataTypeFloat64 * > ( & type ) ) return convertNumericType < Float64 > ( src , type ) ;
bool is_date = typeid_cast < const DataTypeDate * > ( & type ) ;
bool is_datetime = typeid_cast < const DataTypeDateTime * > ( & type ) ;
if ( ! is_date & & ! is_datetime )
throw Exception ( " Logical error: unknown numeric type " + type . getName ( ) , ErrorCodes : : LOGICAL_ERROR ) ;
if ( src . getType ( ) = = Field : : Types : : UInt64 )
return src ;
if ( src . getType ( ) = = Field : : Types : : String )
2014-10-06 20:33:12 +00:00
{
2015-10-08 21:22:49 +00:00
/// Возможность сравнивать даты и даты-с -временем с о строкой.
const String & str = src . get < const String & > ( ) ;
ReadBufferFromString in ( str ) ;
2014-10-06 20:33:12 +00:00
2015-10-08 21:22:49 +00:00
if ( is_date )
2014-10-07 22:20:32 +00:00
{
2015-10-08 21:22:49 +00:00
DayNum_t date { } ;
readDateText ( date , in ) ;
if ( ! in . eof ( ) )
throw Exception ( " String is too long for Date: " + str ) ;
2014-10-07 22:20:32 +00:00
2015-10-08 21:22:49 +00:00
return Field ( UInt64 ( date ) ) ;
2014-10-07 22:20:32 +00:00
}
2015-10-08 21:22:49 +00:00
else
2014-10-07 22:20:32 +00:00
{
2015-10-08 21:22:49 +00:00
time_t date_time { } ;
readDateTimeText ( date_time , in ) ;
if ( ! in . eof ( ) )
throw Exception ( " String is too long for DateTime: " + str ) ;
2014-10-07 22:20:32 +00:00
2015-10-08 21:22:49 +00:00
return Field ( UInt64 ( date_time ) ) ;
2014-10-07 22:20:32 +00:00
}
2014-10-06 20:33:12 +00:00
}
2015-06-16 18:50:44 +00:00
2015-10-08 21:22:49 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-10-12 04:27:02 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " , ErrorCodes : : TYPE_MISMATCH ) ;
2014-10-06 20:33:12 +00:00
}
2015-06-11 23:09:56 +00:00
else
{
if ( src . getType ( ) = = Field : : Types : : UInt64
| | src . getType ( ) = = Field : : Types : : Int64
| | src . getType ( ) = = Field : : Types : : Float64
| | src . getType ( ) = = Field : : Types : : Null
| | ( src . getType ( ) = = Field : : Types : : String
& & ! typeid_cast < const DataTypeString * > ( & type )
& & ! typeid_cast < const DataTypeFixedString * > ( & type ) )
| | ( src . getType ( ) = = Field : : Types : : Array
& & ! typeid_cast < const DataTypeArray * > ( & type ) ) )
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-10-12 04:27:02 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " , ErrorCodes : : TYPE_MISMATCH ) ;
2015-06-11 23:09:56 +00:00
}
2014-10-06 20:33:12 +00:00
return src ;
}
2015-06-12 05:18:47 +00:00
/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */
static Field evaluateConstantExpression ( ASTPtr & node , const Context & context )
{
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer (
2015-07-15 01:26:35 +00:00
node , context , nullptr , NamesAndTypesList { { " _dummy " , new DataTypeUInt8 } } ) . getConstActions ( ) ;
2015-06-12 05:18:47 +00:00
/// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк.
Block block_with_constants { { new ColumnConstUInt8 ( 1 , 0 ) , new DataTypeUInt8 , " _dummy " } } ;
expr_for_constant_folding - > execute ( block_with_constants ) ;
if ( ! block_with_constants | | block_with_constants . rows ( ) = = 0 )
throw Exception ( " Logical error: empty block after evaluation constant expression for IN " , ErrorCodes : : LOGICAL_ERROR ) ;
String name = node - > getColumnName ( ) ;
if ( ! block_with_constants . has ( name ) )
throw Exception ( " Element of set in IN is not a constant expression: " + name , ErrorCodes : : BAD_ARGUMENTS ) ;
const IColumn & result_column = * block_with_constants . getByName ( name ) . column ;
if ( ! result_column . isConst ( ) )
throw Exception ( " Element of set in IN is not a constant expression: " + name , ErrorCodes : : BAD_ARGUMENTS ) ;
return result_column [ 0 ] ;
}
static Field extractValueFromNode ( ASTPtr & node , const IDataType & type , const Context & context )
2012-08-24 19:42:03 +00:00
{
2015-06-12 05:18:47 +00:00
if ( ASTLiteral * lit = typeid_cast < ASTLiteral * > ( node . get ( ) ) )
return convertToType ( lit - > value , type ) ;
else if ( typeid_cast < ASTFunction * > ( node . get ( ) ) )
return convertToType ( evaluateConstantExpression ( node , context ) , type ) ;
else
throw Exception ( " Incorrect element of set. Must be literal or constant expression. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-10-06 20:33:12 +00:00
2015-06-12 05:18:47 +00:00
void Set : : createFromAST ( DataTypes & types , ASTPtr node , const Context & context , bool create_ordered_set )
{
2012-08-24 19:42:03 +00:00
data_types = types ;
/// Засунем множество в блок.
Block block ;
for ( size_t i = 0 , size = data_types . size ( ) ; i < size ; + + i )
{
2015-07-17 01:27:35 +00:00
ColumnWithTypeAndName col ;
2012-08-24 19:42:03 +00:00
col . type = data_types [ i ] ;
col . column = data_types [ i ] - > createColumn ( ) ;
2013-06-21 20:34:19 +00:00
col . name = " _ " + toString ( i ) ;
2012-08-24 19:42:03 +00:00
block . insert ( col ) ;
}
2014-06-26 00:58:14 +00:00
ASTExpressionList & list = typeid_cast < ASTExpressionList & > ( * node ) ;
2012-08-24 19:42:03 +00:00
for ( ASTs : : iterator it = list . children . begin ( ) ; it ! = list . children . end ( ) ; + + it )
{
if ( data_types . size ( ) = = 1 )
{
2015-10-08 21:22:49 +00:00
Field value = extractValueFromNode ( * it , * data_types [ 0 ] , context ) ;
if ( ! value . isNull ( ) )
block . getByPosition ( 0 ) . column - > insert ( value ) ;
2012-08-24 19:42:03 +00:00
}
2014-06-26 00:58:14 +00:00
else if ( ASTFunction * func = typeid_cast < ASTFunction * > ( & * * it ) )
2012-08-24 19:42:03 +00:00
{
if ( func - > name ! = " tuple " )
throw Exception ( " Incorrect element of set. Must be tuple. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
size_t tuple_size = func - > arguments - > children . size ( ) ;
if ( tuple_size ! = data_types . size ( ) )
throw Exception ( " Incorrect size of tuple in set. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
2014-06-26 00:58:14 +00:00
2012-08-24 19:42:03 +00:00
for ( size_t j = 0 ; j < tuple_size ; + + j )
{
2015-10-08 21:22:49 +00:00
Field value = extractValueFromNode ( func - > arguments - > children [ j ] , * data_types [ j ] , context ) ;
if ( ! value . isNull ( ) )
block . getByPosition ( j ) . column - > insert ( value ) ;
2012-08-24 19:42:03 +00:00
}
}
else
throw Exception ( " Incorrect element of set " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-04-01 10:09:22 +00:00
if ( create_ordered_set )
2014-04-08 12:54:32 +00:00
ordered_set_elements = OrderedSetElementsPtr ( new OrderedSetElements ( ) ) ;
2014-04-01 10:09:22 +00:00
insertFromBlock ( block , create_ordered_set ) ;
if ( create_ordered_set )
2014-04-08 12:54:32 +00:00
std : : sort ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) ) ;
2012-08-24 19:42:03 +00:00
}
2015-10-08 03:41:11 +00:00
ColumnPtr Set : : execute ( const Block & block , bool negative ) const
2012-08-23 20:22:44 +00:00
{
2015-10-08 03:41:11 +00:00
size_t num_key_columns = block . columns ( ) ;
if ( 0 = = num_key_columns )
throw Exception ( " Logical error: no columns passed to Set::execute method. " , ErrorCodes : : LOGICAL_ERROR ) ;
ColumnUInt8 * p_res = new ColumnUInt8 ;
ColumnPtr res = p_res ;
ColumnUInt8 : : Container_t & vec_res = p_res - > getData ( ) ;
vec_res . resize ( block . getByPosition ( 0 ) . column - > size ( ) ) ;
2012-08-23 20:35:05 +00:00
2015-01-27 00:52:03 +00:00
Poco : : ScopedReadRWLock lock ( rwlock ) ;
2012-08-24 20:40:34 +00:00
/// Если множество пусто
if ( data_types . empty ( ) )
2013-02-25 18:17:54 +00:00
{
if ( negative )
memset ( & vec_res [ 0 ] , 1 , vec_res . size ( ) ) ;
2014-02-26 16:37:27 +00:00
else
memset ( & vec_res [ 0 ] , 0 , vec_res . size ( ) ) ;
2015-10-08 03:41:11 +00:00
return res ;
2013-02-25 18:17:54 +00:00
}
2014-06-26 00:58:14 +00:00
2015-10-08 03:41:11 +00:00
const DataTypeArray * array_type = typeid_cast < const DataTypeArray * > ( & * block . getByPosition ( 0 ) . type ) ;
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
if ( array_type )
2012-08-23 22:40:51 +00:00
{
2015-10-08 03:41:11 +00:00
if ( data_types . size ( ) ! = 1 | | num_key_columns ! = 1 )
2013-03-19 12:25:59 +00:00
throw Exception ( " Number of columns in section IN doesn't match. " , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
if ( array_type - > getNestedType ( ) - > getName ( ) ! = data_types [ 0 ] - > getName ( ) )
2013-03-27 12:17:33 +00:00
throw Exception ( std : : string ( ) + " Types in section IN don't match: " + data_types [ 0 ] - > getName ( ) + " on the right, " + array_type - > getNestedType ( ) - > getName ( ) + " on the left. " , ErrorCodes : : TYPE_MISMATCH ) ;
2014-06-26 00:58:14 +00:00
2015-10-08 03:41:11 +00:00
const IColumn * in_column = & * block . getByPosition ( 0 ) . column ;
2015-03-02 01:11:37 +00:00
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
ColumnPtr materialized_column ;
if ( in_column - > isConst ( ) )
{
materialized_column = static_cast < const IColumnConst * > ( in_column ) - > convertToFullColumn ( ) ;
in_column = materialized_column . get ( ) ;
}
2015-10-08 03:41:11 +00:00
if ( const ColumnArray * col = typeid_cast < const ColumnArray * > ( in_column ) )
2013-03-25 13:02:12 +00:00
executeArray ( col , vec_res , negative ) ;
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unexpected array column type: " + in_column - > getName ( ) , ErrorCodes : : ILLEGAL_COLUMN ) ;
2013-03-19 12:25:59 +00:00
}
else
{
2015-10-08 03:41:11 +00:00
if ( data_types . size ( ) ! = num_key_columns )
2013-03-19 12:25:59 +00:00
throw Exception ( " Number of columns in section IN doesn't match. " , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
2015-10-08 03:41:11 +00:00
ConstColumnPlainPtrs key_columns ( num_key_columns ) ;
for ( size_t i = 0 ; i < num_key_columns ; + + i )
2013-03-19 12:25:59 +00:00
{
2015-10-08 03:41:11 +00:00
key_columns [ i ] = block . getByPosition ( i ) . column ;
2014-06-26 00:58:14 +00:00
2015-10-08 03:41:11 +00:00
if ( data_types [ i ] - > getName ( ) ! = block . getByPosition ( i ) . type - > getName ( ) )
throw Exception ( " Types of column " + toString ( i + 1 ) + " in section IN don't match: "
+ data_types [ i ] - > getName ( ) + " on the right, " + block . getByPosition ( i ) . type - > getName ( ) + " on the left. " ,
ErrorCodes : : TYPE_MISMATCH ) ;
2013-03-19 12:25:59 +00:00
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
for ( auto & column_ptr : key_columns )
{
if ( column_ptr - > isConst ( ) )
{
materialized_columns . emplace_back ( static_cast < const IColumnConst * > ( column_ptr ) - > convertToFullColumn ( ) ) ;
column_ptr = materialized_columns . back ( ) . get ( ) ;
}
}
2013-03-19 12:25:59 +00:00
executeOrdinary ( key_columns , vec_res , negative ) ;
2012-08-23 22:40:51 +00:00
}
2015-10-08 03:41:11 +00:00
return res ;
2013-03-19 12:25:59 +00:00
}
2012-08-23 22:40:51 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2013-03-19 12:25:59 +00:00
{
2015-03-02 01:11:37 +00:00
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:35:05 +00:00
2015-03-02 01:11:37 +00:00
/// NOTE Н е используется оптимизация для подряд идущих одинаковых значений.
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
2012-08-23 20:35:05 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
vec_res [ i ] = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
}
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeArrayImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
const ColumnArray : : Offsets_t & offsets ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2015-03-02 01:11:37 +00:00
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
size_t prev_offset = 0 ;
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
{
UInt8 res = 0 ;
/// Для всех элементов
for ( size_t j = prev_offset ; j < offsets [ i ] ; + + j )
2012-08-24 19:42:03 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 21:11:54 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , j , key_sizes ) ;
2015-03-02 01:11:37 +00:00
res | = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
if ( res )
break ;
2012-08-24 19:42:03 +00:00
}
2015-03-02 01:11:37 +00:00
vec_res [ i ] = res ;
prev_offset = offsets [ i ] ;
2012-08-23 20:35:05 +00:00
}
2015-03-02 01:11:37 +00:00
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
void Set : : executeOrdinary ( const ConstColumnPlainPtrs & key_columns , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_columns [ 0 ] - > size ( ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
executeImpl ( * data . NAME , key_columns , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2012-08-23 20:35:05 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 20:22:44 +00:00
}
2013-03-25 13:02:12 +00:00
void Set : : executeArray ( const ColumnArray * key_column , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_column - > size ( ) ;
const ColumnArray : : Offsets_t & offsets = key_column - > getOffsets ( ) ;
const IColumn & nested_column = key_column - > getData ( ) ;
2015-03-02 01:11:37 +00:00
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 21:11:54 +00:00
executeArrayImpl ( * data . NAME , ConstColumnPlainPtrs { & nested_column } , offsets , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2013-03-25 13:02:12 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
}
2013-03-19 12:25:59 +00:00
2015-03-27 03:37:46 +00:00
BoolMask Set : : mayBeTrueInRange ( const Range & range ) const
2014-04-01 10:09:22 +00:00
{
2014-04-08 12:54:32 +00:00
if ( ! ordered_set_elements )
2014-04-01 10:09:22 +00:00
throw DB : : Exception ( " Ordered set in not created. " ) ;
2014-06-26 00:58:14 +00:00
2014-04-08 12:54:32 +00:00
if ( ordered_set_elements - > empty ( ) )
2014-04-01 10:09:22 +00:00
return BoolMask ( false , true ) ;
const Field & left = range . left ;
const Field & right = range . right ;
bool can_be_true ;
bool can_be_false = true ;
/// Если во всем диапазоне одинаковый ключ и он есть в Set, то выбираем блок для in и не выбираем для notIn
if ( range . left_bounded & & range . right_bounded & & range . right_included & & range . left_included & & left = = right )
{
2014-04-18 14:24:06 +00:00
if ( std : : binary_search ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left ) )
2014-04-01 10:09:22 +00:00
{
can_be_false = false ;
can_be_true = true ;
}
else
{
can_be_true = false ;
can_be_false = true ;
}
}
else
{
2015-03-27 03:37:46 +00:00
auto left_it = range . left_bounded
? std : : lower_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left )
: ordered_set_elements - > begin ( ) ;
2014-04-08 12:54:32 +00:00
if ( range . left_bounded & & ! range . left_included & & left_it ! = ordered_set_elements - > end ( ) & & * left_it = = left )
2014-04-01 10:09:22 +00:00
+ + left_it ;
/// если весь диапазон, правее in
2014-04-08 12:54:32 +00:00
if ( left_it = = ordered_set_elements - > end ( ) )
2014-04-01 10:09:22 +00:00
{
can_be_true = false ;
}
else
{
2015-03-27 03:37:46 +00:00
auto right_it = range . right_bounded
? std : : upper_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , right )
: ordered_set_elements - > end ( ) ;
2014-04-08 12:54:32 +00:00
if ( range . right_bounded & & ! range . right_included & & right_it ! = ordered_set_elements - > begin ( ) & & * ( right_it - - ) = = right )
2014-04-01 10:09:22 +00:00
- - right_it ;
/// весь диапазон, левее in
2014-04-08 12:54:32 +00:00
if ( right_it = = ordered_set_elements - > begin ( ) )
2014-04-01 10:09:22 +00:00
{
can_be_true = false ;
}
else
{
- - right_it ;
/// в диапазон не попадает ни одного ключа из in
if ( * right_it < * left_it )
can_be_true = false ;
else
can_be_true = true ;
}
}
}
return BoolMask ( can_be_true , can_be_false ) ;
}
2015-10-12 07:05:54 +00:00
std : : string Set : : describe ( ) const
{
if ( ! ordered_set_elements )
return " {} " ;
bool first = true ;
std : : stringstream ss ;
ss < < " { " ;
for ( const Field & f : * ordered_set_elements )
{
ss < < ( first ? " " : " , " ) < < apply_visitor ( FieldVisitorToString ( ) , f ) ;
first = false ;
}
ss < < " } " ;
return ss . str ( ) ;
}
2012-08-23 20:22:44 +00:00
}