2012-08-23 20:22:44 +00:00
# include <DB/Core/Field.h>
2015-10-12 07:05:54 +00:00
# include <DB/Core/FieldVisitors.h>
2016-11-20 12:43:20 +00:00
# include <DB/Core/Row.h>
2012-08-23 20:22:44 +00:00
# include <DB/Columns/ColumnString.h>
# include <DB/Columns/ColumnFixedString.h>
# include <DB/Columns/ColumnsNumber.h>
2016-07-10 11:49:33 +00:00
# include <DB/Columns/ColumnTuple.h>
2012-08-23 20:22:44 +00:00
2012-08-23 23:49:28 +00:00
# include <DB/DataStreams/IProfilingBlockInputStream.h>
2012-08-24 19:42:03 +00:00
# include <DB/DataStreams/OneBlockInputStream.h>
2016-02-13 06:37:19 +00:00
# include <DB/DataTypes/DataTypeArray.h>
2012-08-24 19:42:03 +00:00
# include <DB/Parsers/ASTExpressionList.h>
# include <DB/Parsers/ASTFunction.h>
# include <DB/Parsers/ASTLiteral.h>
2012-08-23 23:49:28 +00:00
2012-08-23 20:22:44 +00:00
# include <DB/Interpreters/Set.h>
2016-02-13 06:37:19 +00:00
# include <DB/Interpreters/convertFieldToType.h>
# include <DB/Interpreters/evaluateConstantExpression.h>
2012-08-23 20:22:44 +00:00
namespace DB
{
2014-06-26 00:58:14 +00:00
2016-01-12 02:21:15 +00:00
namespace ErrorCodes
{
extern const int UNKNOWN_SET_DATA_VARIANT ;
extern const int LOGICAL_ERROR ;
extern const int SET_SIZE_LIMIT_EXCEEDED ;
extern const int TYPE_MISMATCH ;
extern const int INCORRECT_ELEMENT_OF_SET ;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH ;
}
2015-03-02 01:11:37 +00:00
void SetVariants : : init ( Type type_ )
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
type = type_ ;
switch ( type )
{
case Type : : EMPTY : break ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2016-09-23 20:33:06 +00:00
case Type : : NAME : NAME = std : : make_unique < decltype ( NAME ) : : element_type > ( ) ; break ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
2016-01-12 02:21:15 +00:00
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2015-03-02 01:11:37 +00:00
}
}
size_t SetVariants : : getTotalRowCount ( ) const
{
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . size ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
2016-01-12 02:21:15 +00:00
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2015-03-02 01:11:37 +00:00
}
2013-06-20 12:12:27 +00:00
}
2015-03-02 01:11:37 +00:00
size_t SetVariants : : getTotalByteCount ( ) const
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . getBufferSizeInBytes ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
2016-01-12 02:21:15 +00:00
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2015-03-02 01:11:37 +00:00
}
}
2013-06-20 12:12:27 +00:00
bool Set : : checkSetSizeLimits ( ) const
{
2015-03-02 01:11:37 +00:00
if ( max_rows & & data . getTotalRowCount ( ) > max_rows )
2013-06-20 12:12:27 +00:00
return false ;
2015-03-02 01:11:37 +00:00
if ( max_bytes & & data . getTotalByteCount ( ) > max_bytes )
2013-06-20 12:12:27 +00:00
return false ;
return true ;
}
2014-06-26 00:58:14 +00:00
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
SetVariants : : Type SetVariants : : chooseMethod ( const ConstColumnPlainPtrs & key_columns , Sizes & key_sizes )
2012-08-23 20:22:44 +00:00
{
2013-07-19 19:53:51 +00:00
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
bool all_fixed = true ;
2012-08-23 20:22:44 +00:00
size_t keys_bytes = 0 ;
key_sizes . resize ( keys_size ) ;
for ( size_t j = 0 ; j < keys_size ; + + j )
{
2013-07-19 20:12:02 +00:00
if ( ! key_columns [ j ] - > isFixed ( ) )
2012-08-23 20:22:44 +00:00
{
2015-03-02 01:11:37 +00:00
all_fixed = false ;
2012-08-23 20:22:44 +00:00
break ;
}
2013-07-19 19:53:51 +00:00
key_sizes [ j ] = key_columns [ j ] - > sizeOfField ( ) ;
2012-08-23 20:22:44 +00:00
keys_bytes + = key_sizes [ j ] ;
}
2014-11-06 03:03:40 +00:00
2013-07-19 20:12:02 +00:00
/// Если есть один числовой ключ, который помещается в 64 бита
2013-07-19 19:53:51 +00:00
if ( keys_size = = 1 & & key_columns [ 0 ] - > isNumeric ( ) )
2015-03-02 01:11:37 +00:00
{
size_t size_of_field = key_columns [ 0 ] - > sizeOfField ( ) ;
if ( size_of_field = = 1 )
return SetVariants : : Type : : key8 ;
if ( size_of_field = = 2 )
return SetVariants : : Type : : key16 ;
if ( size_of_field = = 4 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key32 ;
2015-03-02 01:11:37 +00:00
if ( size_of_field = = 8 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key64 ;
2015-03-02 01:11:37 +00:00
throw Exception ( " Logical error: numeric column has sizeOfField not in 1, 2, 4, 8. " , ErrorCodes : : LOGICAL_ERROR ) ;
}
/// Если ключи помещаются в N бит, будем использовать хэш-таблицу по упакованным в N-бит ключам
if ( all_fixed & & keys_bytes < = 16 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys128 ;
2015-03-02 01:11:37 +00:00
if ( all_fixed & & keys_bytes < = 32 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys256 ;
2012-08-23 20:22:44 +00:00
2016-07-10 17:19:35 +00:00
/// If there is single string key, use hash table of it's values.
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & ( typeid_cast < const ColumnString * > ( key_columns [ 0 ] ) | | typeid_cast < const ColumnConstString * > ( key_columns [ 0 ] ) ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_string ;
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & typeid_cast < const ColumnFixedString * > ( key_columns [ 0 ] ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_fixed_string ;
2015-03-02 01:11:37 +00:00
2016-07-10 17:19:35 +00:00
/// Otherwise, will use set of cryptographic hashes of unambiguously serialized values.
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : hashed ;
2015-03-02 01:11:37 +00:00
}
template < typename Method >
2015-03-02 05:41:21 +00:00
void NO_INLINE Set : : insertFromBlockImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
size_t rows ,
SetVariants & variants )
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
/// Для всех строчек
2015-03-02 05:41:21 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2015-03-02 01:11:37 +00:00
{
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
typename Method : : Data : : iterator it = method . data . find ( key ) ;
bool inserted ;
method . data . emplace ( key , it , inserted ) ;
if ( inserted )
2015-03-03 20:00:39 +00:00
method . onNewKey ( * it , keys_size , i , variants . string_pool ) ;
2015-03-02 01:11:37 +00:00
}
2012-08-23 20:22:44 +00:00
}
2015-01-27 00:52:03 +00:00
bool Set : : insertFromBlock ( const Block & block , bool create_ordered_set )
2012-08-23 20:22:44 +00:00
{
2015-01-27 00:52:03 +00:00
Poco : : ScopedWriteRWLock lock ( rwlock ) ;
2014-03-04 11:26:55 +00:00
size_t keys_size = block . columns ( ) ;
2016-07-10 11:49:33 +00:00
ConstColumnPlainPtrs key_columns ;
key_columns . reserve ( keys_size ) ;
2016-07-10 18:53:27 +00:00
if ( empty ( ) )
{
data_types . clear ( ) ;
data_types . reserve ( keys_size ) ;
}
2014-03-04 11:26:55 +00:00
2015-03-15 07:56:46 +00:00
/// Константные столбцы справа от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
2014-03-04 11:26:55 +00:00
/// Запоминаем столбцы, с которыми будем работать
for ( size_t i = 0 ; i < keys_size ; + + i )
2014-03-04 11:26:55 +00:00
{
2016-07-10 11:49:33 +00:00
key_columns . emplace_back ( block . getByPosition ( i ) . column . get ( ) ) ;
2016-07-10 18:53:27 +00:00
if ( empty ( ) )
data_types . emplace_back ( block . getByPosition ( i ) . type ) ;
2015-03-15 07:56:46 +00:00
2016-07-10 11:49:33 +00:00
if ( auto converted = key_columns . back ( ) - > convertToFullColumnIfConst ( ) )
2015-03-15 07:56:46 +00:00
{
2015-10-28 21:21:18 +00:00
materialized_columns . emplace_back ( converted ) ;
2016-07-10 11:49:33 +00:00
key_columns . back ( ) = materialized_columns . back ( ) . get ( ) ;
}
/** Flatten tuples. For case when written
* ( a , b ) IN ( SELECT ( a , b ) FROM table )
* instead of more typical
* ( a , b ) IN ( SELECT a , b FROM table )
*/
if ( const ColumnTuple * tuple = typeid_cast < const ColumnTuple * > ( key_columns . back ( ) ) )
{
key_columns . pop_back ( ) ;
const Columns & tuple_elements = tuple - > getColumns ( ) ;
for ( const auto & elem : tuple_elements )
key_columns . push_back ( elem . get ( ) ) ;
2016-07-10 18:53:27 +00:00
if ( empty ( ) )
{
data_types . pop_back ( ) ;
const Block & tuple_block = tuple - > getData ( ) ;
for ( size_t i = 0 , size = tuple_block . columns ( ) ; i < size ; + + i )
data_types . push_back ( tuple_block . unsafeGetByPosition ( i ) . type ) ;
}
2015-03-15 07:56:46 +00:00
}
2014-03-04 11:26:55 +00:00
}
2012-08-23 20:22:44 +00:00
2014-03-04 11:26:55 +00:00
size_t rows = block . rows ( ) ;
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
/// Какую структуру данных для множества использовать?
2014-05-01 15:02:36 +00:00
if ( empty ( ) )
2015-03-02 01:11:37 +00:00
data . init ( data . chooseMethod ( key_columns , key_sizes ) ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
insertFromBlockImpl ( * data . NAME , key_columns , rows , data ) ;
2015-03-02 05:41:21 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 23:49:28 +00:00
2015-03-02 01:11:37 +00:00
if ( create_ordered_set )
2014-03-04 11:26:55 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2016-04-08 20:34:32 +00:00
ordered_set_elements - > push_back ( ( * key_columns [ 0 ] ) [ i ] ) ; /// ordered_set для индекса работает только если IN по одному ключу, а не кортажам
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
if ( ! checkSetSizeLimits ( ) )
2012-08-23 23:49:28 +00:00
{
2014-03-04 11:26:55 +00:00
if ( overflow_mode = = OverflowMode : : THROW )
2015-03-02 01:11:37 +00:00
throw Exception ( " IN-set size exceeded. "
" Rows: " + toString ( data . getTotalRowCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_rows ) +
2015-03-02 01:11:37 +00:00
" . Bytes: " + toString ( data . getTotalByteCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_bytes ) + " . " ,
ErrorCodes : : SET_SIZE_LIMIT_EXCEEDED ) ;
if ( overflow_mode = = OverflowMode : : BREAK )
return false ;
throw Exception ( " Logical error: unknown overflow mode " , ErrorCodes : : LOGICAL_ERROR ) ;
2012-08-23 23:49:28 +00:00
}
2014-03-04 11:26:55 +00:00
return true ;
2012-08-23 20:22:44 +00:00
}
2015-06-12 05:18:47 +00:00
static Field extractValueFromNode ( ASTPtr & node , const IDataType & type , const Context & context )
2012-08-24 19:42:03 +00:00
{
2015-06-12 05:18:47 +00:00
if ( ASTLiteral * lit = typeid_cast < ASTLiteral * > ( node . get ( ) ) )
2016-02-13 06:37:19 +00:00
return convertFieldToType ( lit - > value , type ) ;
2015-06-12 05:18:47 +00:00
else if ( typeid_cast < ASTFunction * > ( node . get ( ) ) )
2016-02-13 06:37:19 +00:00
return convertFieldToType ( evaluateConstantExpression ( node , context ) , type ) ;
2015-06-12 05:18:47 +00:00
else
throw Exception ( " Incorrect element of set. Must be literal or constant expression. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-10-06 20:33:12 +00:00
2016-11-24 12:26:47 +00:00
void Set : : createFromAST ( const DataTypes & types , ASTPtr node , const Context & context , bool create_ordered_set )
2015-06-12 05:18:47 +00:00
{
2012-08-24 19:42:03 +00:00
data_types = types ;
/// Засунем множество в блок.
Block block ;
for ( size_t i = 0 , size = data_types . size ( ) ; i < size ; + + i )
{
2015-07-17 01:27:35 +00:00
ColumnWithTypeAndName col ;
2012-08-24 19:42:03 +00:00
col . type = data_types [ i ] ;
col . column = data_types [ i ] - > createColumn ( ) ;
2013-06-21 20:34:19 +00:00
col . name = " _ " + toString ( i ) ;
2012-08-24 19:42:03 +00:00
2016-08-04 23:35:07 +00:00
block . insert ( std : : move ( col ) ) ;
2012-08-24 19:42:03 +00:00
}
2016-03-22 00:09:08 +00:00
Row tuple_values ;
2014-06-26 00:58:14 +00:00
ASTExpressionList & list = typeid_cast < ASTExpressionList & > ( * node ) ;
2012-08-24 19:42:03 +00:00
for ( ASTs : : iterator it = list . children . begin ( ) ; it ! = list . children . end ( ) ; + + it )
{
if ( data_types . size ( ) = = 1 )
{
2015-10-08 21:22:49 +00:00
Field value = extractValueFromNode ( * it , * data_types [ 0 ] , context ) ;
if ( ! value . isNull ( ) )
block . getByPosition ( 0 ) . column - > insert ( value ) ;
2012-08-24 19:42:03 +00:00
}
2016-03-22 00:09:08 +00:00
else if ( ASTFunction * func = typeid_cast < ASTFunction * > ( it - > get ( ) ) )
2012-08-24 19:42:03 +00:00
{
if ( func - > name ! = " tuple " )
throw Exception ( " Incorrect element of set. Must be tuple. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
size_t tuple_size = func - > arguments - > children . size ( ) ;
if ( tuple_size ! = data_types . size ( ) )
throw Exception ( " Incorrect size of tuple in set. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
2014-06-26 00:58:14 +00:00
2016-03-22 00:09:08 +00:00
if ( tuple_values . empty ( ) )
tuple_values . resize ( tuple_size ) ;
size_t j = 0 ;
for ( ; j < tuple_size ; + + j )
2012-08-24 19:42:03 +00:00
{
2015-10-08 21:22:49 +00:00
Field value = extractValueFromNode ( func - > arguments - > children [ j ] , * data_types [ j ] , context ) ;
2016-03-22 00:09:08 +00:00
/// Если хотя бы один из элементов кортежа имеет невозможное (вне диапазона типа) значение, то и весь кортеж тоже.
if ( value . isNull ( ) )
break ;
tuple_values [ j ] = value ; /// TODO Сделать move семантику для Field.
2012-08-24 19:42:03 +00:00
}
2016-03-22 00:09:08 +00:00
if ( j = = tuple_size )
for ( j = 0 ; j < tuple_size ; + + j )
block . getByPosition ( j ) . column - > insert ( tuple_values [ j ] ) ;
2012-08-24 19:42:03 +00:00
}
else
throw Exception ( " Incorrect element of set " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-04-01 10:09:22 +00:00
if ( create_ordered_set )
2014-04-08 12:54:32 +00:00
ordered_set_elements = OrderedSetElementsPtr ( new OrderedSetElements ( ) ) ;
2014-04-01 10:09:22 +00:00
insertFromBlock ( block , create_ordered_set ) ;
if ( create_ordered_set )
2016-04-08 20:34:32 +00:00
{
2014-04-08 12:54:32 +00:00
std : : sort ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) ) ;
2016-04-08 20:34:32 +00:00
ordered_set_elements - > erase ( std : : unique ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) ) , ordered_set_elements - > end ( ) ) ;
}
2012-08-24 19:42:03 +00:00
}
2015-10-08 03:41:11 +00:00
ColumnPtr Set : : execute ( const Block & block , bool negative ) const
2012-08-23 20:22:44 +00:00
{
2015-10-08 03:41:11 +00:00
size_t num_key_columns = block . columns ( ) ;
if ( 0 = = num_key_columns )
throw Exception ( " Logical error: no columns passed to Set::execute method. " , ErrorCodes : : LOGICAL_ERROR ) ;
2016-05-28 05:31:36 +00:00
auto res = std : : make_shared < ColumnUInt8 > ( ) ;
ColumnUInt8 : : Container_t & vec_res = res - > getData ( ) ;
2015-10-08 03:41:11 +00:00
vec_res . resize ( block . getByPosition ( 0 ) . column - > size ( ) ) ;
2012-08-23 20:35:05 +00:00
2015-01-27 00:52:03 +00:00
Poco : : ScopedReadRWLock lock ( rwlock ) ;
2012-08-24 20:40:34 +00:00
/// Если множество пусто
if ( data_types . empty ( ) )
2013-02-25 18:17:54 +00:00
{
if ( negative )
memset ( & vec_res [ 0 ] , 1 , vec_res . size ( ) ) ;
2014-02-26 16:37:27 +00:00
else
memset ( & vec_res [ 0 ] , 0 , vec_res . size ( ) ) ;
2015-10-08 03:41:11 +00:00
return res ;
2013-02-25 18:17:54 +00:00
}
2014-06-26 00:58:14 +00:00
2016-07-10 02:33:50 +00:00
const DataTypeArray * array_type = typeid_cast < const DataTypeArray * > ( block . getByPosition ( 0 ) . type . get ( ) ) ;
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
if ( array_type )
2012-08-23 22:40:51 +00:00
{
2015-10-08 03:41:11 +00:00
if ( data_types . size ( ) ! = 1 | | num_key_columns ! = 1 )
2013-03-19 12:25:59 +00:00
throw Exception ( " Number of columns in section IN doesn't match. " , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
if ( array_type - > getNestedType ( ) - > getName ( ) ! = data_types [ 0 ] - > getName ( ) )
2013-03-27 12:17:33 +00:00
throw Exception ( std : : string ( ) + " Types in section IN don't match: " + data_types [ 0 ] - > getName ( ) + " on the right, " + array_type - > getNestedType ( ) - > getName ( ) + " on the left. " , ErrorCodes : : TYPE_MISMATCH ) ;
2014-06-26 00:58:14 +00:00
2016-07-10 02:33:50 +00:00
const IColumn * in_column = block . getByPosition ( 0 ) . column . get ( ) ;
2015-03-02 01:11:37 +00:00
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
2016-03-07 05:05:42 +00:00
ColumnPtr materialized_column = in_column - > convertToFullColumnIfConst ( ) ;
if ( materialized_column )
2015-03-02 01:11:37 +00:00
in_column = materialized_column . get ( ) ;
2015-10-08 03:41:11 +00:00
if ( const ColumnArray * col = typeid_cast < const ColumnArray * > ( in_column ) )
2013-03-25 13:02:12 +00:00
executeArray ( col , vec_res , negative ) ;
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unexpected array column type: " + in_column - > getName ( ) , ErrorCodes : : ILLEGAL_COLUMN ) ;
2013-03-19 12:25:59 +00:00
}
else
{
2015-10-08 03:41:11 +00:00
if ( data_types . size ( ) ! = num_key_columns )
2016-07-10 11:49:33 +00:00
{
std : : stringstream message ;
message < < " Number of columns in section IN doesn't match. "
< < num_key_columns < < " at left, " < < data_types . size ( ) < < " at right. " ;
throw Exception ( message . str ( ) , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
}
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
2016-07-10 11:49:33 +00:00
ConstColumnPlainPtrs key_columns ;
key_columns . reserve ( num_key_columns ) ;
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
2015-10-08 03:41:11 +00:00
for ( size_t i = 0 ; i < num_key_columns ; + + i )
2013-03-19 12:25:59 +00:00
{
2016-07-10 11:49:33 +00:00
key_columns . push_back ( block . getByPosition ( i ) . column . get ( ) ) ;
2014-06-26 00:58:14 +00:00
2015-10-08 03:41:11 +00:00
if ( data_types [ i ] - > getName ( ) ! = block . getByPosition ( i ) . type - > getName ( ) )
throw Exception ( " Types of column " + toString ( i + 1 ) + " in section IN don't match: "
+ data_types [ i ] - > getName ( ) + " on the right, " + block . getByPosition ( i ) . type - > getName ( ) + " on the left. " ,
ErrorCodes : : TYPE_MISMATCH ) ;
2014-06-26 00:58:14 +00:00
2016-07-10 11:49:33 +00:00
if ( auto converted = key_columns . back ( ) - > convertToFullColumnIfConst ( ) )
2015-03-02 01:11:37 +00:00
{
2015-10-28 21:21:18 +00:00
materialized_columns . emplace_back ( converted ) ;
2016-07-10 11:49:33 +00:00
key_columns . back ( ) = materialized_columns . back ( ) . get ( ) ;
2015-03-02 01:11:37 +00:00
}
}
2013-03-19 12:25:59 +00:00
executeOrdinary ( key_columns , vec_res , negative ) ;
2012-08-23 22:40:51 +00:00
}
2015-10-08 03:41:11 +00:00
return res ;
2013-03-19 12:25:59 +00:00
}
2012-08-23 22:40:51 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2013-03-19 12:25:59 +00:00
{
2015-03-02 01:11:37 +00:00
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:35:05 +00:00
2015-03-02 01:11:37 +00:00
/// NOTE Н е используется оптимизация для подряд идущих одинаковых значений.
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
2012-08-23 20:35:05 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
vec_res [ i ] = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
}
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeArrayImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
const ColumnArray : : Offsets_t & offsets ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2015-03-02 01:11:37 +00:00
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
size_t prev_offset = 0 ;
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
{
UInt8 res = 0 ;
/// Для всех элементов
for ( size_t j = prev_offset ; j < offsets [ i ] ; + + j )
2012-08-24 19:42:03 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 21:11:54 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , j , key_sizes ) ;
2015-03-02 01:11:37 +00:00
res | = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
if ( res )
break ;
2012-08-24 19:42:03 +00:00
}
2015-03-02 01:11:37 +00:00
vec_res [ i ] = res ;
prev_offset = offsets [ i ] ;
2012-08-23 20:35:05 +00:00
}
2015-03-02 01:11:37 +00:00
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
void Set : : executeOrdinary ( const ConstColumnPlainPtrs & key_columns , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_columns [ 0 ] - > size ( ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
executeImpl ( * data . NAME , key_columns , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2012-08-23 20:35:05 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 20:22:44 +00:00
}
2013-03-25 13:02:12 +00:00
void Set : : executeArray ( const ColumnArray * key_column , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_column - > size ( ) ;
const ColumnArray : : Offsets_t & offsets = key_column - > getOffsets ( ) ;
const IColumn & nested_column = key_column - > getData ( ) ;
2015-03-02 01:11:37 +00:00
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 21:11:54 +00:00
executeArrayImpl ( * data . NAME , ConstColumnPlainPtrs { & nested_column } , offsets , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2013-03-25 13:02:12 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
}
2013-03-19 12:25:59 +00:00
2016-04-08 20:34:32 +00:00
/// Возвращаем BoolMask.
/// Первый элемент - может ли в диапазоне range быть элемент множества.
/// Второй элемент - может ли в диапазоне range быть элемент не из множества.
2015-03-27 03:37:46 +00:00
BoolMask Set : : mayBeTrueInRange ( const Range & range ) const
2014-04-01 10:09:22 +00:00
{
2014-04-08 12:54:32 +00:00
if ( ! ordered_set_elements )
2016-04-08 23:36:34 +00:00
throw Exception ( " Ordered set in not created. " ) ;
2014-06-26 00:58:14 +00:00
2014-04-08 12:54:32 +00:00
if ( ordered_set_elements - > empty ( ) )
2016-04-08 20:34:32 +00:00
return { false , true } ;
/// Диапазон (-inf; +inf)
if ( ! range . left_bounded & & ! range . right_bounded )
return { true , true } ;
2014-04-01 10:09:22 +00:00
const Field & left = range . left ;
const Field & right = range . right ;
2016-04-08 20:34:32 +00:00
/// Диапазон (-inf; right|
if ( ! range . left_bounded )
{
if ( range . right_included )
return { ordered_set_elements - > front ( ) < = right , true } ;
else
return { ordered_set_elements - > front ( ) < right , true } ;
}
2014-04-01 10:09:22 +00:00
2016-04-08 20:34:32 +00:00
/// Диапазон |left; +inf)
if ( ! range . right_bounded )
2014-04-01 10:09:22 +00:00
{
2016-04-08 20:34:32 +00:00
if ( range . left_included )
return { ordered_set_elements - > back ( ) > = left , true } ;
2014-04-01 10:09:22 +00:00
else
2016-04-08 20:34:32 +00:00
return { ordered_set_elements - > back ( ) > left , true } ;
2014-04-01 10:09:22 +00:00
}
2016-04-08 20:34:32 +00:00
/// Диапазон из одного значения [left].
if ( range . left_included & & range . right_included & & left = = right )
2014-04-01 10:09:22 +00:00
{
2016-04-08 20:34:32 +00:00
if ( std : : binary_search ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left ) )
return { true , false } ;
else
return { false , true } ;
}
2015-03-27 03:37:46 +00:00
2016-04-08 20:34:32 +00:00
/// Первый элемент множества, который больше или равен left.
auto left_it = std : : lower_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left ) ;
2014-04-01 10:09:22 +00:00
2016-04-08 20:34:32 +00:00
/// Если left не входит в диапазон (открытый диапазон), то возьмём следующий по порядку элемент множества.
if ( ! range . left_included & & left_it ! = ordered_set_elements - > end ( ) & & * left_it = = left )
+ + left_it ;
2015-03-27 03:37:46 +00:00
2016-04-08 20:34:32 +00:00
/// если весь диапазон правее множества: { set } | range |
if ( left_it = = ordered_set_elements - > end ( ) )
return { false , true } ;
2014-04-01 10:09:22 +00:00
2016-04-08 20:34:32 +00:00
/// Первый элемент множества, который строго больше right.
auto right_it = std : : upper_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , right ) ;
/// весь диапазон левее множества: | range | { set }
if ( right_it = = ordered_set_elements - > begin ( ) )
return { false , true } ;
/// Последний элемент множества, который меньше или равен right.
- - right_it ;
/// Если right не входит в диапазон (открытый диапазон), то возьмём предыдущий по порядку элемент множества.
if ( ! range . right_included & & * right_it = = right )
{
/// весь диапазон левее множества, хотя открытый диапазон касается множества: | range ){ set }
if ( right_it = = ordered_set_elements - > begin ( ) )
return { false , true } ;
- - right_it ;
2014-04-01 10:09:22 +00:00
}
2016-04-08 20:34:32 +00:00
/// В диапазон не попадает ни одного ключа из множества, хотя он расположен где-то посередине относительно е г о элементов: * * * * [ ] * * * *
if ( right_it < left_it )
return { false , true } ;
return { true , true } ;
2014-04-01 10:09:22 +00:00
}
2015-10-12 07:05:54 +00:00
std : : string Set : : describe ( ) const
{
if ( ! ordered_set_elements )
return " {} " ;
bool first = true ;
std : : stringstream ss ;
ss < < " { " ;
for ( const Field & f : * ordered_set_elements )
{
ss < < ( first ? " " : " , " ) < < apply_visitor ( FieldVisitorToString ( ) , f ) ;
first = false ;
}
ss < < " } " ;
return ss . str ( ) ;
}
2012-08-23 20:22:44 +00:00
}