2012-08-23 20:22:44 +00:00
# include <DB/Core/Field.h>
# include <DB/Columns/ColumnString.h>
# include <DB/Columns/ColumnFixedString.h>
# include <DB/Columns/ColumnsNumber.h>
2012-08-23 23:49:28 +00:00
# include <DB/DataStreams/IProfilingBlockInputStream.h>
2012-08-24 19:42:03 +00:00
# include <DB/DataStreams/OneBlockInputStream.h>
# include <DB/Parsers/ASTExpressionList.h>
# include <DB/Parsers/ASTFunction.h>
# include <DB/Parsers/ASTLiteral.h>
2012-08-23 23:49:28 +00:00
2012-08-23 20:22:44 +00:00
# include <DB/Interpreters/Set.h>
2015-06-12 05:18:47 +00:00
# include <DB/Interpreters/ExpressionAnalyzer.h>
# include <DB/Interpreters/ExpressionActions.h>
2013-03-19 12:25:59 +00:00
# include <DB/DataTypes/DataTypeArray.h>
2013-03-25 13:02:12 +00:00
# include <DB/DataTypes/DataTypesNumberFixed.h>
# include <DB/DataTypes/DataTypeString.h>
# include <DB/DataTypes/DataTypeFixedString.h>
2015-06-12 05:54:49 +00:00
# include <DB/DataTypes/DataTypeDate.h>
# include <DB/DataTypes/DataTypeDateTime.h>
2012-08-23 20:22:44 +00:00
namespace DB
{
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
void SetVariants : : init ( Type type_ )
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
type = type_ ;
switch ( type )
{
case Type : : EMPTY : break ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : NAME . reset ( new decltype ( NAME ) : : element_type ) ; break ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
}
size_t SetVariants : : getTotalRowCount ( ) const
{
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . size ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
2013-06-20 12:12:27 +00:00
}
2015-03-02 01:11:37 +00:00
size_t SetVariants : : getTotalByteCount ( ) const
2013-06-20 12:12:27 +00:00
{
2015-03-02 01:11:37 +00:00
switch ( type )
{
case Type : : EMPTY : return 0 ;
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
case Type : : NAME : return NAME - > data . getBufferSizeInBytes ( ) ;
APPLY_FOR_SET_VARIANTS ( M )
# undef M
default :
throw Exception ( " Unknown Set variant. " , ErrorCodes : : UNKNOWN_AGGREGATED_DATA_VARIANT ) ;
}
}
2013-06-20 12:12:27 +00:00
bool Set : : checkSetSizeLimits ( ) const
{
2015-03-02 01:11:37 +00:00
if ( max_rows & & data . getTotalRowCount ( ) > max_rows )
2013-06-20 12:12:27 +00:00
return false ;
2015-03-02 01:11:37 +00:00
if ( max_bytes & & data . getTotalByteCount ( ) > max_bytes )
2013-06-20 12:12:27 +00:00
return false ;
return true ;
}
2014-06-26 00:58:14 +00:00
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
SetVariants : : Type SetVariants : : chooseMethod ( const ConstColumnPlainPtrs & key_columns , Sizes & key_sizes )
2012-08-23 20:22:44 +00:00
{
2013-07-19 19:53:51 +00:00
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:22:44 +00:00
2015-03-02 01:11:37 +00:00
bool all_fixed = true ;
2012-08-23 20:22:44 +00:00
size_t keys_bytes = 0 ;
key_sizes . resize ( keys_size ) ;
for ( size_t j = 0 ; j < keys_size ; + + j )
{
2013-07-19 20:12:02 +00:00
if ( ! key_columns [ j ] - > isFixed ( ) )
2012-08-23 20:22:44 +00:00
{
2015-03-02 01:11:37 +00:00
all_fixed = false ;
2012-08-23 20:22:44 +00:00
break ;
}
2013-07-19 19:53:51 +00:00
key_sizes [ j ] = key_columns [ j ] - > sizeOfField ( ) ;
2012-08-23 20:22:44 +00:00
keys_bytes + = key_sizes [ j ] ;
}
2014-11-06 03:03:40 +00:00
2013-07-19 20:12:02 +00:00
/// Если есть один числовой ключ, который помещается в 64 бита
2013-07-19 19:53:51 +00:00
if ( keys_size = = 1 & & key_columns [ 0 ] - > isNumeric ( ) )
2015-03-02 01:11:37 +00:00
{
size_t size_of_field = key_columns [ 0 ] - > sizeOfField ( ) ;
if ( size_of_field = = 1 )
return SetVariants : : Type : : key8 ;
if ( size_of_field = = 2 )
return SetVariants : : Type : : key16 ;
if ( size_of_field = = 4 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key32 ;
2015-03-02 01:11:37 +00:00
if ( size_of_field = = 8 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key64 ;
2015-03-02 01:11:37 +00:00
throw Exception ( " Logical error: numeric column has sizeOfField not in 1, 2, 4, 8. " , ErrorCodes : : LOGICAL_ERROR ) ;
}
/// Если ключи помещаются в N бит, будем использовать хэш-таблицу по упакованным в N-бит ключам
if ( all_fixed & & keys_bytes < = 16 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys128 ;
2015-03-02 01:11:37 +00:00
if ( all_fixed & & keys_bytes < = 32 )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : keys256 ;
2012-08-23 20:22:44 +00:00
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & ( typeid_cast < const ColumnString * > ( key_columns [ 0 ] ) | | typeid_cast < const ColumnConstString * > ( key_columns [ 0 ] ) ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_string ;
2015-03-02 01:11:37 +00:00
if ( keys_size = = 1 & & typeid_cast < const ColumnFixedString * > ( key_columns [ 0 ] ) )
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : key_fixed_string ;
2015-03-02 01:11:37 +00:00
/// Иначе будем агрегировать по конкатенации ключей.
2015-03-02 05:41:21 +00:00
return SetVariants : : Type : : hashed ;
2015-03-02 01:11:37 +00:00
}
template < typename Method >
2015-03-02 05:41:21 +00:00
void NO_INLINE Set : : insertFromBlockImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
size_t rows ,
SetVariants & variants )
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
/// Для всех строчек
2015-03-02 05:41:21 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2015-03-02 01:11:37 +00:00
{
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
typename Method : : Data : : iterator it = method . data . find ( key ) ;
bool inserted ;
method . data . emplace ( key , it , inserted ) ;
if ( inserted )
2015-03-03 20:00:39 +00:00
method . onNewKey ( * it , keys_size , i , variants . string_pool ) ;
2015-03-02 01:11:37 +00:00
}
2012-08-23 20:22:44 +00:00
}
2015-01-27 00:52:03 +00:00
bool Set : : insertFromBlock ( const Block & block , bool create_ordered_set )
2012-08-23 20:22:44 +00:00
{
2015-01-27 00:52:03 +00:00
Poco : : ScopedWriteRWLock lock ( rwlock ) ;
2014-03-04 11:26:55 +00:00
size_t keys_size = block . columns ( ) ;
ConstColumnPlainPtrs key_columns ( keys_size ) ;
data_types . resize ( keys_size ) ;
2015-03-15 07:56:46 +00:00
/// Константные столбцы справа от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
2014-03-04 11:26:55 +00:00
/// Запоминаем столбцы, с которыми будем работать
for ( size_t i = 0 ; i < keys_size ; + + i )
2014-03-04 11:26:55 +00:00
{
2014-03-04 11:26:55 +00:00
key_columns [ i ] = block . getByPosition ( i ) . column ;
data_types [ i ] = block . getByPosition ( i ) . type ;
2015-03-15 07:56:46 +00:00
if ( key_columns [ i ] - > isConst ( ) )
{
materialized_columns . emplace_back ( static_cast < const IColumnConst * > ( key_columns [ i ] ) - > convertToFullColumn ( ) ) ;
key_columns [ i ] = materialized_columns . back ( ) . get ( ) ;
}
2014-03-04 11:26:55 +00:00
}
2012-08-23 20:22:44 +00:00
2014-03-04 11:26:55 +00:00
size_t rows = block . rows ( ) ;
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
/// Какую структуру данных для множества использовать?
2014-05-01 15:02:36 +00:00
if ( empty ( ) )
2015-03-02 01:11:37 +00:00
data . init ( data . chooseMethod ( key_columns , key_sizes ) ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
insertFromBlockImpl ( * data . NAME , key_columns , rows , data ) ;
2015-03-02 05:41:21 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 23:49:28 +00:00
2015-03-02 01:11:37 +00:00
if ( create_ordered_set )
2014-03-04 11:26:55 +00:00
for ( size_t i = 0 ; i < rows ; + + i )
2015-03-02 05:41:21 +00:00
ordered_set_elements - > push_back ( ( * key_columns [ 0 ] ) [ i ] ) ; /// ordered_set для индекса работает только если IN одному ключу.
2014-03-04 14:19:32 +00:00
2014-03-04 11:26:55 +00:00
if ( ! checkSetSizeLimits ( ) )
2012-08-23 23:49:28 +00:00
{
2014-03-04 11:26:55 +00:00
if ( overflow_mode = = OverflowMode : : THROW )
2015-03-02 01:11:37 +00:00
throw Exception ( " IN-set size exceeded. "
" Rows: " + toString ( data . getTotalRowCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_rows ) +
2015-03-02 01:11:37 +00:00
" . Bytes: " + toString ( data . getTotalByteCount ( ) ) +
2014-03-04 11:26:55 +00:00
" , limit: " + toString ( max_bytes ) + " . " ,
ErrorCodes : : SET_SIZE_LIMIT_EXCEEDED ) ;
if ( overflow_mode = = OverflowMode : : BREAK )
return false ;
throw Exception ( " Logical error: unknown overflow mode " , ErrorCodes : : LOGICAL_ERROR ) ;
2012-08-23 23:49:28 +00:00
}
2014-03-04 11:26:55 +00:00
return true ;
2012-08-23 20:22:44 +00:00
}
2014-10-06 20:33:12 +00:00
/** Чтобы корректно работали выражения вида 1.0 IN (1).
2014-10-07 22:20:32 +00:00
* П р о в е р я е т с о в м е с т и м о с т ь т и п о в , п р о в е р я е т п о п а д а н и е з н а ч е н и й в д и а п а з о н д о п у с т и м ы х з н а ч е н и й т и п а , д е л а е т п р е о б р а з о в а н и е т и п а .
* К о д с л е г к а д у р а ц к и й .
2014-10-06 20:33:12 +00:00
*/
static Field convertToType ( const Field & src , const IDataType & type )
{
2015-06-12 05:54:49 +00:00
if ( type . isNumeric ( ) )
2014-10-06 20:33:12 +00:00
{
2014-10-07 22:20:32 +00:00
bool is_uint8 = false ;
bool is_uint16 = false ;
bool is_uint32 = false ;
bool is_uint64 = false ;
bool is_int8 = false ;
bool is_int16 = false ;
bool is_int32 = false ;
bool is_int64 = false ;
bool is_float32 = false ;
bool is_float64 = false ;
2015-06-12 05:54:49 +00:00
bool is_date = false ;
bool is_datetime = false ;
2014-10-07 22:20:32 +00:00
false
| | ( is_uint8 = typeid_cast < const DataTypeUInt8 * > ( & type ) )
| | ( is_uint16 = typeid_cast < const DataTypeUInt16 * > ( & type ) )
| | ( is_uint32 = typeid_cast < const DataTypeUInt32 * > ( & type ) )
| | ( is_uint64 = typeid_cast < const DataTypeUInt64 * > ( & type ) )
| | ( is_int8 = typeid_cast < const DataTypeInt8 * > ( & type ) )
| | ( is_int16 = typeid_cast < const DataTypeInt16 * > ( & type ) )
| | ( is_int32 = typeid_cast < const DataTypeInt32 * > ( & type ) )
| | ( is_int64 = typeid_cast < const DataTypeInt64 * > ( & type ) )
| | ( is_float32 = typeid_cast < const DataTypeFloat32 * > ( & type ) )
2015-06-12 05:54:49 +00:00
| | ( is_float64 = typeid_cast < const DataTypeFloat64 * > ( & type ) )
| | ( is_date = typeid_cast < const DataTypeDate * > ( & type ) )
| | ( is_datetime = typeid_cast < const DataTypeDateTime * > ( & type ) )
;
2014-10-07 22:20:32 +00:00
if ( is_uint8 | | is_uint16 | | is_uint32 | | is_uint64 )
2014-10-06 20:33:12 +00:00
{
if ( src . getType ( ) = = Field : : Types : : Int64 )
2015-06-12 05:18:47 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, signed at right " ) ;
2014-10-06 20:33:12 +00:00
if ( src . getType ( ) = = Field : : Types : : Float64 )
2015-06-12 05:18:47 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, floating point at right " ) ;
2014-10-06 20:33:12 +00:00
if ( src . getType ( ) = = Field : : Types : : UInt64 )
2014-10-07 22:20:32 +00:00
{
UInt64 value = src . get < const UInt64 & > ( ) ;
if ( ( is_uint8 & & value > std : : numeric_limits < uint8_t > : : max ( ) )
| | ( is_uint16 & & value > std : : numeric_limits < uint16_t > : : max ( ) )
| | ( is_uint32 & & value > std : : numeric_limits < uint32_t > : : max ( ) ) )
throw Exception ( " Value ( " + toString ( value ) + " ) in IN section is out of range of type " + type . getName ( ) + " at left " ) ;
2014-10-06 20:33:12 +00:00
return src ;
2014-10-07 22:20:32 +00:00
}
2014-10-06 20:33:12 +00:00
2014-10-07 22:20:32 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-06-12 05:18:47 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " ) ;
2014-10-06 20:33:12 +00:00
}
2014-10-07 22:20:32 +00:00
else if ( is_int8 | | is_int16 | | is_int32 | | is_int64 )
2014-10-06 20:33:12 +00:00
{
if ( src . getType ( ) = = Field : : Types : : Float64 )
2015-06-12 05:18:47 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, floating point at right " ) ;
2014-10-06 20:33:12 +00:00
if ( src . getType ( ) = = Field : : Types : : UInt64 )
2014-10-07 22:20:32 +00:00
{
UInt64 value = src . get < const UInt64 & > ( ) ;
if ( ( is_int8 & & value > uint8_t ( std : : numeric_limits < int8_t > : : max ( ) ) )
| | ( is_int16 & & value > uint16_t ( std : : numeric_limits < int16_t > : : max ( ) ) )
2014-10-07 22:22:09 +00:00
| | ( is_int32 & & value > uint32_t ( std : : numeric_limits < int32_t > : : max ( ) ) )
| | ( is_int64 & & value > uint64_t ( std : : numeric_limits < int64_t > : : max ( ) ) ) )
2014-10-07 22:20:32 +00:00
throw Exception ( " Value ( " + toString ( value ) + " ) in IN section is out of range of type " + type . getName ( ) + " at left " ) ;
return Field ( Int64 ( value ) ) ;
}
2014-10-06 20:33:12 +00:00
if ( src . getType ( ) = = Field : : Types : : Int64 )
2014-10-07 22:20:32 +00:00
{
Int64 value = src . get < const Int64 & > ( ) ;
if ( ( is_int8 & & ( value < std : : numeric_limits < int8_t > : : min ( ) | | value > std : : numeric_limits < int8_t > : : max ( ) ) )
| | ( is_int16 & & ( value < std : : numeric_limits < int16_t > : : min ( ) | | value > std : : numeric_limits < int16_t > : : max ( ) ) )
| | ( is_int32 & & ( value < std : : numeric_limits < int32_t > : : min ( ) | | value > std : : numeric_limits < int32_t > : : max ( ) ) ) )
throw Exception ( " Value ( " + toString ( value ) + " ) in IN section is out of range of type " + type . getName ( ) + " at left " ) ;
2014-10-06 20:33:12 +00:00
return src ;
2014-10-07 22:20:32 +00:00
}
2014-10-06 20:33:12 +00:00
2014-10-07 22:20:32 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-06-12 05:18:47 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " ) ;
2014-10-06 20:33:12 +00:00
}
2014-10-07 22:20:32 +00:00
else if ( is_float32 | | is_float64 )
2014-10-06 20:33:12 +00:00
{
if ( src . getType ( ) = = Field : : Types : : UInt64 )
return Field ( Float64 ( src . get < UInt64 > ( ) ) ) ;
if ( src . getType ( ) = = Field : : Types : : Int64 )
return Field ( Float64 ( src . get < Int64 > ( ) ) ) ;
if ( src . getType ( ) = = Field : : Types : : Float64 )
return src ;
2014-10-07 22:20:32 +00:00
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-06-12 05:18:47 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " ) ;
2014-10-06 20:33:12 +00:00
}
2015-06-12 05:54:49 +00:00
else if ( is_date | | is_datetime )
{
2015-06-16 18:50:44 +00:00
if ( src . getType ( ) = = Field : : Types : : UInt64 )
return src ;
2015-06-12 05:54:49 +00:00
2015-06-16 18:50:44 +00:00
if ( src . getType ( ) = = Field : : Types : : String )
{
/// Возможность сравнивать даты и даты-с -временем с о строкой.
const String & str = src . get < const String & > ( ) ;
ReadBufferFromString in ( str ) ;
if ( is_date )
{
DayNum_t date { } ;
readDateText ( date , in ) ;
if ( ! in . eof ( ) )
throw Exception ( " String is too long for Date: " + str ) ;
return Field ( UInt64 ( date ) ) ;
}
else
{
time_t date_time { } ;
readDateTimeText ( date_time , in ) ;
if ( ! in . eof ( ) )
throw Exception ( " String is too long for DateTime: " + str ) ;
return Field ( UInt64 ( date_time ) ) ;
}
}
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " ) ;
2015-06-12 05:54:49 +00:00
}
2014-10-06 20:33:12 +00:00
}
2015-06-11 23:09:56 +00:00
else
{
if ( src . getType ( ) = = Field : : Types : : UInt64
| | src . getType ( ) = = Field : : Types : : Int64
| | src . getType ( ) = = Field : : Types : : Float64
| | src . getType ( ) = = Field : : Types : : Null
| | ( src . getType ( ) = = Field : : Types : : String
& & ! typeid_cast < const DataTypeString * > ( & type )
& & ! typeid_cast < const DataTypeFixedString * > ( & type ) )
| | ( src . getType ( ) = = Field : : Types : : Array
& & ! typeid_cast < const DataTypeArray * > ( & type ) ) )
throw Exception ( " Type mismatch in IN section: " + type . getName ( ) + " at left, "
2015-06-12 05:18:47 +00:00
+ Field : : Types : : toString ( src . getType ( ) ) + " at right " ) ;
2015-06-11 23:09:56 +00:00
}
2014-10-06 20:33:12 +00:00
return src ;
}
2015-06-12 05:18:47 +00:00
/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */
static Field evaluateConstantExpression ( ASTPtr & node , const Context & context )
{
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer (
2015-07-15 01:26:35 +00:00
node , context , nullptr , NamesAndTypesList { { " _dummy " , new DataTypeUInt8 } } ) . getConstActions ( ) ;
2015-06-12 05:18:47 +00:00
/// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк.
Block block_with_constants { { new ColumnConstUInt8 ( 1 , 0 ) , new DataTypeUInt8 , " _dummy " } } ;
expr_for_constant_folding - > execute ( block_with_constants ) ;
if ( ! block_with_constants | | block_with_constants . rows ( ) = = 0 )
throw Exception ( " Logical error: empty block after evaluation constant expression for IN " , ErrorCodes : : LOGICAL_ERROR ) ;
String name = node - > getColumnName ( ) ;
if ( ! block_with_constants . has ( name ) )
throw Exception ( " Element of set in IN is not a constant expression: " + name , ErrorCodes : : BAD_ARGUMENTS ) ;
const IColumn & result_column = * block_with_constants . getByName ( name ) . column ;
if ( ! result_column . isConst ( ) )
throw Exception ( " Element of set in IN is not a constant expression: " + name , ErrorCodes : : BAD_ARGUMENTS ) ;
return result_column [ 0 ] ;
}
static Field extractValueFromNode ( ASTPtr & node , const IDataType & type , const Context & context )
2012-08-24 19:42:03 +00:00
{
2015-06-12 05:18:47 +00:00
if ( ASTLiteral * lit = typeid_cast < ASTLiteral * > ( node . get ( ) ) )
return convertToType ( lit - > value , type ) ;
else if ( typeid_cast < ASTFunction * > ( node . get ( ) ) )
return convertToType ( evaluateConstantExpression ( node , context ) , type ) ;
else
throw Exception ( " Incorrect element of set. Must be literal or constant expression. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-10-06 20:33:12 +00:00
2015-06-12 05:18:47 +00:00
void Set : : createFromAST ( DataTypes & types , ASTPtr node , const Context & context , bool create_ordered_set )
{
2012-08-24 19:42:03 +00:00
data_types = types ;
/// Засунем множество в блок.
Block block ;
for ( size_t i = 0 , size = data_types . size ( ) ; i < size ; + + i )
{
ColumnWithNameAndType col ;
col . type = data_types [ i ] ;
col . column = data_types [ i ] - > createColumn ( ) ;
2013-06-21 20:34:19 +00:00
col . name = " _ " + toString ( i ) ;
2012-08-24 19:42:03 +00:00
block . insert ( col ) ;
}
2014-06-26 00:58:14 +00:00
ASTExpressionList & list = typeid_cast < ASTExpressionList & > ( * node ) ;
2012-08-24 19:42:03 +00:00
for ( ASTs : : iterator it = list . children . begin ( ) ; it ! = list . children . end ( ) ; + + it )
{
if ( data_types . size ( ) = = 1 )
{
2015-06-12 05:18:47 +00:00
block . getByPosition ( 0 ) . column - > insert ( extractValueFromNode ( * it , * data_types [ 0 ] , context ) ) ;
2012-08-24 19:42:03 +00:00
}
2014-06-26 00:58:14 +00:00
else if ( ASTFunction * func = typeid_cast < ASTFunction * > ( & * * it ) )
2012-08-24 19:42:03 +00:00
{
if ( func - > name ! = " tuple " )
throw Exception ( " Incorrect element of set. Must be tuple. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
size_t tuple_size = func - > arguments - > children . size ( ) ;
if ( tuple_size ! = data_types . size ( ) )
throw Exception ( " Incorrect size of tuple in set. " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
2014-06-26 00:58:14 +00:00
2012-08-24 19:42:03 +00:00
for ( size_t j = 0 ; j < tuple_size ; + + j )
{
2015-06-12 05:18:47 +00:00
block . getByPosition ( j ) . column - > insert ( extractValueFromNode ( func - > arguments - > children [ j ] , * data_types [ j ] , context ) ) ;
2012-08-24 19:42:03 +00:00
}
}
else
throw Exception ( " Incorrect element of set " , ErrorCodes : : INCORRECT_ELEMENT_OF_SET ) ;
}
2014-04-01 10:09:22 +00:00
if ( create_ordered_set )
2014-04-08 12:54:32 +00:00
ordered_set_elements = OrderedSetElementsPtr ( new OrderedSetElements ( ) ) ;
2014-04-01 10:09:22 +00:00
insertFromBlock ( block , create_ordered_set ) ;
if ( create_ordered_set )
2014-04-08 12:54:32 +00:00
std : : sort ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) ) ;
2012-08-24 19:42:03 +00:00
}
2012-08-23 20:22:44 +00:00
void Set : : execute ( Block & block , const ColumnNumbers & arguments , size_t result , bool negative ) const
{
ColumnUInt8 * c_res = new ColumnUInt8 ;
block . getByPosition ( result ) . column = c_res ;
ColumnUInt8 : : Container_t & vec_res = c_res - > getData ( ) ;
vec_res . resize ( block . getByPosition ( arguments [ 0 ] ) . column - > size ( ) ) ;
2012-08-23 20:35:05 +00:00
2015-01-27 00:52:03 +00:00
Poco : : ScopedReadRWLock lock ( rwlock ) ;
2012-08-24 20:40:34 +00:00
/// Если множество пусто
if ( data_types . empty ( ) )
2013-02-25 18:17:54 +00:00
{
if ( negative )
memset ( & vec_res [ 0 ] , 1 , vec_res . size ( ) ) ;
2014-02-26 16:37:27 +00:00
else
memset ( & vec_res [ 0 ] , 0 , vec_res . size ( ) ) ;
2012-08-24 20:40:34 +00:00
return ;
2013-02-25 18:17:54 +00:00
}
2014-06-26 00:58:14 +00:00
DataTypeArray * array_type = typeid_cast < DataTypeArray * > ( & * block . getByPosition ( arguments [ 0 ] ) . type ) ;
2013-03-19 12:25:59 +00:00
if ( array_type )
2012-08-23 22:40:51 +00:00
{
2013-03-19 12:25:59 +00:00
if ( data_types . size ( ) ! = 1 | | arguments . size ( ) ! = 1 )
throw Exception ( " Number of columns in section IN doesn't match. " , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
if ( array_type - > getNestedType ( ) - > getName ( ) ! = data_types [ 0 ] - > getName ( ) )
2013-03-27 12:17:33 +00:00
throw Exception ( std : : string ( ) + " Types in section IN don't match: " + data_types [ 0 ] - > getName ( ) + " on the right, " + array_type - > getNestedType ( ) - > getName ( ) + " on the left. " , ErrorCodes : : TYPE_MISMATCH ) ;
2014-06-26 00:58:14 +00:00
2013-03-25 13:02:12 +00:00
IColumn * in_column = & * block . getByPosition ( arguments [ 0 ] ) . column ;
2015-03-02 01:11:37 +00:00
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
ColumnPtr materialized_column ;
if ( in_column - > isConst ( ) )
{
materialized_column = static_cast < const IColumnConst * > ( in_column ) - > convertToFullColumn ( ) ;
in_column = materialized_column . get ( ) ;
}
if ( ColumnArray * col = typeid_cast < ColumnArray * > ( in_column ) )
2013-03-25 13:02:12 +00:00
executeArray ( col , vec_res , negative ) ;
else
2015-03-02 01:11:37 +00:00
throw Exception ( " Unexpected array column type: " + in_column - > getName ( ) , ErrorCodes : : ILLEGAL_COLUMN ) ;
2013-03-19 12:25:59 +00:00
}
else
{
if ( data_types . size ( ) ! = arguments . size ( ) )
throw Exception ( " Number of columns in section IN doesn't match. " , ErrorCodes : : NUMBER_OF_COLUMNS_DOESNT_MATCH ) ;
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
ConstColumnPlainPtrs key_columns ( arguments . size ( ) ) ;
for ( size_t i = 0 ; i < arguments . size ( ) ; + + i )
{
key_columns [ i ] = block . getByPosition ( arguments [ i ] ) . column ;
2014-06-26 00:58:14 +00:00
2013-03-19 12:25:59 +00:00
if ( data_types [ i ] - > getName ( ) ! = block . getByPosition ( arguments [ i ] ) . type - > getName ( ) )
2013-06-21 20:34:19 +00:00
throw Exception ( " Types of column " + toString ( i + 1 ) + " in section IN don't match: " + data_types [ i ] - > getName ( ) + " on the right, " + block . getByPosition ( arguments [ i ] ) . type - > getName ( ) + " on the left. " , ErrorCodes : : TYPE_MISMATCH ) ;
2013-03-19 12:25:59 +00:00
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
Columns materialized_columns ;
for ( auto & column_ptr : key_columns )
{
if ( column_ptr - > isConst ( ) )
{
materialized_columns . emplace_back ( static_cast < const IColumnConst * > ( column_ptr ) - > convertToFullColumn ( ) ) ;
column_ptr = materialized_columns . back ( ) . get ( ) ;
}
}
2013-03-19 12:25:59 +00:00
executeOrdinary ( key_columns , vec_res , negative ) ;
2012-08-23 22:40:51 +00:00
}
2013-03-19 12:25:59 +00:00
}
2012-08-23 22:40:51 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2013-03-19 12:25:59 +00:00
{
2015-03-02 01:11:37 +00:00
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2012-08-23 20:35:05 +00:00
2015-03-02 01:11:37 +00:00
/// NOTE Н е используется оптимизация для подряд идущих одинаковых значений.
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
2012-08-23 20:35:05 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 20:00:39 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , i , key_sizes ) ;
2015-03-02 01:11:37 +00:00
vec_res [ i ] = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
}
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
template < typename Method >
2015-03-02 01:39:42 +00:00
void NO_INLINE Set : : executeArrayImpl (
2015-03-02 01:11:37 +00:00
Method & method ,
const ConstColumnPlainPtrs & key_columns ,
const ColumnArray : : Offsets_t & offsets ,
ColumnUInt8 : : Container_t & vec_res ,
bool negative ,
2015-03-03 20:00:39 +00:00
size_t rows ) const
2015-03-02 01:11:37 +00:00
{
typename Method : : State state ;
state . init ( key_columns ) ;
size_t keys_size = key_columns . size ( ) ;
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
size_t prev_offset = 0 ;
/// Для всех строчек
for ( size_t i = 0 ; i < rows ; + + i )
{
UInt8 res = 0 ;
/// Для всех элементов
for ( size_t j = prev_offset ; j < offsets [ i ] ; + + j )
2012-08-24 19:42:03 +00:00
{
2015-03-02 01:11:37 +00:00
/// Строим ключ
2015-03-03 21:11:54 +00:00
typename Method : : Key key = state . getKey ( key_columns , keys_size , j , key_sizes ) ;
2015-03-02 01:11:37 +00:00
res | = negative ^ ( method . data . end ( ) ! = method . data . find ( key ) ) ;
if ( res )
break ;
2012-08-24 19:42:03 +00:00
}
2015-03-02 01:11:37 +00:00
vec_res [ i ] = res ;
prev_offset = offsets [ i ] ;
2012-08-23 20:35:05 +00:00
}
2015-03-02 01:11:37 +00:00
}
2014-06-26 00:58:14 +00:00
2015-03-02 01:11:37 +00:00
void Set : : executeOrdinary ( const ConstColumnPlainPtrs & key_columns , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_columns [ 0 ] - > size ( ) ;
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 20:00:39 +00:00
executeImpl ( * data . NAME , key_columns , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2012-08-23 20:35:05 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
2012-08-23 20:22:44 +00:00
}
2013-03-25 13:02:12 +00:00
void Set : : executeArray ( const ColumnArray * key_column , ColumnUInt8 : : Container_t & vec_res , bool negative ) const
{
size_t rows = key_column - > size ( ) ;
const ColumnArray : : Offsets_t & offsets = key_column - > getOffsets ( ) ;
const IColumn & nested_column = key_column - > getData ( ) ;
2015-03-02 01:11:37 +00:00
if ( false ) { }
2015-03-02 05:41:21 +00:00
# define M(NAME) \
2015-03-02 01:11:37 +00:00
else if ( data . type = = SetVariants : : Type : : NAME ) \
2015-03-03 21:11:54 +00:00
executeArrayImpl ( * data . NAME , ConstColumnPlainPtrs { & nested_column } , offsets , vec_res , negative , rows ) ;
2015-03-02 01:11:37 +00:00
APPLY_FOR_SET_VARIANTS ( M )
# undef M
2013-03-25 13:02:12 +00:00
else
throw Exception ( " Unknown set variant. " , ErrorCodes : : UNKNOWN_SET_DATA_VARIANT ) ;
}
2013-03-19 12:25:59 +00:00
2015-03-27 03:37:46 +00:00
BoolMask Set : : mayBeTrueInRange ( const Range & range ) const
2014-04-01 10:09:22 +00:00
{
2014-04-08 12:54:32 +00:00
if ( ! ordered_set_elements )
2014-04-01 10:09:22 +00:00
throw DB : : Exception ( " Ordered set in not created. " ) ;
2014-06-26 00:58:14 +00:00
2014-04-08 12:54:32 +00:00
if ( ordered_set_elements - > empty ( ) )
2014-04-01 10:09:22 +00:00
return BoolMask ( false , true ) ;
const Field & left = range . left ;
const Field & right = range . right ;
bool can_be_true ;
bool can_be_false = true ;
/// Если во всем диапазоне одинаковый ключ и он есть в Set, то выбираем блок для in и не выбираем для notIn
if ( range . left_bounded & & range . right_bounded & & range . right_included & & range . left_included & & left = = right )
{
2014-04-18 14:24:06 +00:00
if ( std : : binary_search ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left ) )
2014-04-01 10:09:22 +00:00
{
can_be_false = false ;
can_be_true = true ;
}
else
{
can_be_true = false ;
can_be_false = true ;
}
}
else
{
2015-03-27 03:37:46 +00:00
auto left_it = range . left_bounded
? std : : lower_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , left )
: ordered_set_elements - > begin ( ) ;
2014-04-08 12:54:32 +00:00
if ( range . left_bounded & & ! range . left_included & & left_it ! = ordered_set_elements - > end ( ) & & * left_it = = left )
2014-04-01 10:09:22 +00:00
+ + left_it ;
/// если весь диапазон, правее in
2014-04-08 12:54:32 +00:00
if ( left_it = = ordered_set_elements - > end ( ) )
2014-04-01 10:09:22 +00:00
{
can_be_true = false ;
}
else
{
2015-03-27 03:37:46 +00:00
auto right_it = range . right_bounded
? std : : upper_bound ( ordered_set_elements - > begin ( ) , ordered_set_elements - > end ( ) , right )
: ordered_set_elements - > end ( ) ;
2014-04-08 12:54:32 +00:00
if ( range . right_bounded & & ! range . right_included & & right_it ! = ordered_set_elements - > begin ( ) & & * ( right_it - - ) = = right )
2014-04-01 10:09:22 +00:00
- - right_it ;
/// весь диапазон, левее in
2014-04-08 12:54:32 +00:00
if ( right_it = = ordered_set_elements - > begin ( ) )
2014-04-01 10:09:22 +00:00
{
can_be_true = false ;
}
else
{
- - right_it ;
/// в диапазон не попадает ни одного ключа из in
if ( * right_it < * left_it )
can_be_true = false ;
else
can_be_true = true ;
}
}
}
return BoolMask ( can_be_true , can_be_false ) ;
}
2012-08-23 20:22:44 +00:00
}