2017-03-09 03:34:09 +00:00
# include <string.h> // memcpy
2017-04-01 09:19:00 +00:00
# include <Columns/ColumnArray.h>
# include <Columns/ColumnsNumber.h>
# include <Columns/ColumnString.h>
2017-04-18 03:03:39 +00:00
# include <Columns/ColumnTuple.h>
# include <Columns/ColumnNullable.h>
2017-07-21 06:35:58 +00:00
# include <Columns/ColumnConst.h>
2017-04-01 09:19:00 +00:00
# include <Columns/ColumnsCommon.h>
2018-12-14 17:50:10 +00:00
# include <common/unaligned.h>
2017-07-06 13:54:55 +00:00
# include <DataStreams/ColumnGathererStream.h>
2017-04-01 09:19:00 +00:00
# include <Common/Exception.h>
# include <Common/Arena.h>
# include <Common/SipHash.h>
2017-07-13 20:58:19 +00:00
# include <Common/typeid_cast.h>
2019-08-21 02:28:04 +00:00
# include <Common/assert_cast.h>
2020-03-18 16:03:55 +00:00
# include <Common/WeakHash.h>
2020-03-18 16:46:07 +00:00
# include <Common/HashTable/Hash.h>
2017-03-09 03:34:09 +00:00
2015-07-06 19:24:51 +00:00
namespace DB
{
2017-03-09 03:34:09 +00:00
namespace ErrorCodes
{
2017-04-01 07:20:54 +00:00
extern const int ILLEGAL_COLUMN ;
extern const int NOT_IMPLEMENTED ;
extern const int BAD_ARGUMENTS ;
extern const int PARAMETER_OUT_OF_BOUND ;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH ;
2017-07-27 23:23:13 +00:00
extern const int LOGICAL_ERROR ;
2017-03-09 03:34:09 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnArray : : ColumnArray ( MutableColumnPtr & & nested_column , MutableColumnPtr & & offsets_column )
: data ( std : : move ( nested_column ) ) , offsets ( std : : move ( offsets_column ) )
2017-03-09 03:34:09 +00:00
{
2018-03-21 19:39:14 +00:00
if ( ! typeid_cast < const ColumnOffsets * > ( offsets . get ( ) ) )
2017-12-13 19:14:12 +00:00
throw Exception ( " offsets_column must be a ColumnUInt64 " , ErrorCodes : : ILLEGAL_COLUMN ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
/** NOTE
2017-12-09 17:40:31 +00:00
* Arrays with constant value are possible and used in implementation of higher order functions ( see FunctionReplicate ) .
2017-04-01 07:20:54 +00:00
* But in most cases , arrays with constant value are unexpected and code will work wrong . Use with caution .
*/
2017-03-09 03:34:09 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnArray : : ColumnArray ( MutableColumnPtr & & nested_column )
: data ( std : : move ( nested_column ) )
2017-12-15 19:46:24 +00:00
{
if ( ! data - > empty ( ) )
throw Exception ( " Not empty data passed to ColumnArray, but no offsets passed " , ErrorCodes : : ILLEGAL_COLUMN ) ;
2017-12-15 21:32:25 +00:00
offsets = ColumnOffsets : : create ( ) ;
2017-12-15 19:46:24 +00:00
}
2017-03-09 03:34:09 +00:00
2017-12-07 22:11:51 +00:00
std : : string ColumnArray : : getName ( ) const { return " Array( " + getData ( ) . getName ( ) + " ) " ; }
2017-03-09 03:34:09 +00:00
2017-12-14 01:43:19 +00:00
MutableColumnPtr ColumnArray : : cloneResized ( size_t to_size ) const
2017-03-09 03:34:09 +00:00
{
2017-12-15 19:46:24 +00:00
auto res = ColumnArray : : create ( getData ( ) . cloneEmpty ( ) ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
if ( to_size = = 0 )
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
size_t from_size = size ( ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
if ( to_size < = from_size )
{
/// Just cut column.
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
res - > getOffsets ( ) . assign ( getOffsets ( ) . begin ( ) , getOffsets ( ) . begin ( ) + to_size ) ;
res - > getData ( ) . insertRangeFrom ( getData ( ) , 0 , getOffsets ( ) [ to_size - 1 ] ) ;
}
else
{
/// Copy column and append empty arrays for extra elements.
2017-03-09 03:34:09 +00:00
2017-12-15 21:32:25 +00:00
Offset offset = 0 ;
2017-04-01 07:20:54 +00:00
if ( from_size > 0 )
{
res - > getOffsets ( ) . assign ( getOffsets ( ) . begin ( ) , getOffsets ( ) . end ( ) ) ;
2017-12-15 19:17:15 +00:00
res - > getData ( ) . insertRangeFrom ( getData ( ) , 0 , getData ( ) . size ( ) ) ;
2017-04-01 07:20:54 +00:00
offset = getOffsets ( ) . back ( ) ;
}
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
res - > getOffsets ( ) . resize ( to_size ) ;
for ( size_t i = from_size ; i < to_size ; + + i )
res - > getOffsets ( ) [ i ] = offset ;
}
2017-03-09 03:34:09 +00:00
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2017-03-09 03:34:09 +00:00
}
size_t ColumnArray : : size ( ) const
{
2017-04-01 07:20:54 +00:00
return getOffsets ( ) . size ( ) ;
2017-03-09 03:34:09 +00:00
}
Field ColumnArray : : operator [ ] ( size_t n ) const
{
2017-04-01 07:20:54 +00:00
size_t offset = offsetAt ( n ) ;
size_t size = sizeAt ( n ) ;
Array res ( size ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < size ; + + i )
res [ i ] = getData ( ) [ offset + i ] ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
return res ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : get ( size_t n , Field & res ) const
{
2017-04-01 07:20:54 +00:00
size_t offset = offsetAt ( n ) ;
size_t size = sizeAt ( n ) ;
res = Array ( size ) ;
Array & res_arr = DB : : get < Array & > ( res ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < size ; + + i )
getData ( ) . get ( offset + i , res_arr [ i ] ) ;
2017-03-09 03:34:09 +00:00
}
StringRef ColumnArray : : getDataAt ( size_t n ) const
{
2017-04-01 07:20:54 +00:00
/** Returns the range of memory that covers all elements of the array.
* Works for arrays of fixed length values .
* For arrays of strings and arrays of arrays , the resulting chunk of memory may not be one - to - one correspondence with the elements ,
* since it contains only the data laid in succession , but not the offsets .
*/
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
size_t offset_of_first_elem = offsetAt ( n ) ;
StringRef first = getData ( ) . getDataAtWithTerminatingZero ( offset_of_first_elem ) ;
2017-03-09 03:34:09 +00:00
2018-12-26 03:15:34 +00:00
size_t array_size = sizeAt ( n ) ;
if ( array_size = = 0 )
return StringRef ( first . data , 0 ) ;
2017-04-01 07:20:54 +00:00
size_t offset_of_last_elem = getOffsets ( ) [ n ] - 1 ;
StringRef last = getData ( ) . getDataAtWithTerminatingZero ( offset_of_last_elem ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
return StringRef ( first . data , last . data + last . size - first . data ) ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : insertData ( const char * pos , size_t length )
{
2017-04-01 07:20:54 +00:00
/** Similarly - only for arrays of fixed length values.
*/
2020-03-23 02:12:31 +00:00
if ( ! data - > isFixedAndContiguous ( ) )
2017-04-01 07:20:54 +00:00
throw Exception ( " Method insertData is not supported for " + getName ( ) , ErrorCodes : : NOT_IMPLEMENTED ) ;
2017-03-09 03:34:09 +00:00
2020-03-23 02:12:31 +00:00
size_t field_size = data - > sizeOfValueIfFixed ( ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
const char * end = pos + length ;
size_t elems = 0 ;
for ( ; pos + field_size < = end ; pos + = field_size , + + elems )
2020-03-23 02:12:31 +00:00
data - > insertData ( pos , field_size ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
if ( pos ! = end )
throw Exception ( " Incorrect length argument for method ColumnArray::insertData " , ErrorCodes : : BAD_ARGUMENTS ) ;
2017-03-09 03:34:09 +00:00
2018-12-24 14:26:38 +00:00
getOffsets ( ) . push_back ( getOffsets ( ) . back ( ) + elems ) ;
2017-03-09 03:34:09 +00:00
}
StringRef ColumnArray : : serializeValueIntoArena ( size_t n , Arena & arena , char const * & begin ) const
{
2017-04-01 07:20:54 +00:00
size_t array_size = sizeAt ( n ) ;
size_t offset = offsetAt ( n ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
char * pos = arena . allocContinue ( sizeof ( array_size ) , begin ) ;
memcpy ( pos , & array_size , sizeof ( array_size ) ) ;
2017-03-09 03:34:09 +00:00
2019-07-15 16:20:21 +00:00
StringRef res ( pos , sizeof ( array_size ) ) ;
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < array_size ; + + i )
2019-07-15 16:20:21 +00:00
{
auto value_ref = getData ( ) . serializeValueIntoArena ( offset + i , arena , begin ) ;
res . data = value_ref . data - res . size ;
res . size + = value_ref . size ;
}
2017-03-09 03:34:09 +00:00
2019-07-15 16:20:21 +00:00
return res ;
2017-03-09 03:34:09 +00:00
}
const char * ColumnArray : : deserializeAndInsertFromArena ( const char * pos )
{
2018-12-14 17:50:10 +00:00
size_t array_size = unalignedLoad < size_t > ( pos ) ;
2017-04-01 07:20:54 +00:00
pos + = sizeof ( array_size ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < array_size ; + + i )
pos = getData ( ) . deserializeAndInsertFromArena ( pos ) ;
2017-03-09 03:34:09 +00:00
2018-12-24 14:26:38 +00:00
getOffsets ( ) . push_back ( getOffsets ( ) . back ( ) + array_size ) ;
2017-04-01 07:20:54 +00:00
return pos ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : updateHashWithValue ( size_t n , SipHash & hash ) const
{
2017-04-01 07:20:54 +00:00
size_t array_size = sizeAt ( n ) ;
size_t offset = offsetAt ( n ) ;
2017-03-09 03:34:09 +00:00
2018-03-03 15:36:20 +00:00
hash . update ( array_size ) ;
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < array_size ; + + i )
getData ( ) . updateHashWithValue ( offset + i , hash ) ;
2017-03-09 03:34:09 +00:00
}
2020-03-18 16:03:55 +00:00
void ColumnArray : : updateWeakHash32 ( WeakHash32 & hash ) const
{
auto s = offsets - > size ( ) ;
if ( hash . getData ( ) . size ( ) ! = s )
throw Exception ( " Size of WeakHash32 does not match size of column: column size is " + std : : to_string ( s ) +
" , hash size is " + std : : to_string ( hash . getData ( ) . size ( ) ) , ErrorCodes : : LOGICAL_ERROR ) ;
WeakHash32 internal_hash ( data - > size ( ) ) ;
data - > updateWeakHash32 ( internal_hash ) ;
Offset prev_offset = 0 ;
auto & offsets_data = getOffsets ( ) ;
auto & hash_data = hash . getData ( ) ;
auto & internal_hash_data = internal_hash . getData ( ) ;
for ( size_t i = 0 ; i < s ; + + i )
{
2020-03-24 11:38:41 +00:00
/// This row improves hash a little bit according to integration tests.
/// It is the same as to use previous hash value as the first element of array.
2020-03-24 11:29:55 +00:00
hash_data [ i ] = intHashCRC32 ( hash_data [ i ] ) ;
2020-03-18 16:03:55 +00:00
for ( size_t row = prev_offset ; row < offsets_data [ i ] ; + + row )
/// It is probably not the best way to combine hashes.
/// But much better then xor which lead to similar hash for arrays like [1], [1, 1, 1], [1, 1, 1, 1, 1], ...
/// Much better implementation - to add offsets as an optional argument to updateWeakHash32.
hash_data [ i ] = intHashCRC32 ( internal_hash_data [ row ] , hash_data [ i ] ) ;
2020-03-20 17:31:05 +00:00
prev_offset = offsets_data [ i ] ;
2020-03-18 16:03:55 +00:00
}
}
2017-03-09 03:34:09 +00:00
void ColumnArray : : insert ( const Field & x )
{
2017-04-01 07:20:54 +00:00
const Array & array = DB : : get < const Array & > ( x ) ;
size_t size = array . size ( ) ;
for ( size_t i = 0 ; i < size ; + + i )
getData ( ) . insert ( array [ i ] ) ;
2018-12-24 14:26:38 +00:00
getOffsets ( ) . push_back ( getOffsets ( ) . back ( ) + size ) ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : insertFrom ( const IColumn & src_ , size_t n )
{
2019-08-21 02:28:04 +00:00
const ColumnArray & src = assert_cast < const ColumnArray & > ( src_ ) ;
2017-04-01 07:20:54 +00:00
size_t size = src . sizeAt ( n ) ;
size_t offset = src . offsetAt ( n ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
getData ( ) . insertRangeFrom ( src . getData ( ) , offset , size ) ;
2018-12-24 14:26:38 +00:00
getOffsets ( ) . push_back ( getOffsets ( ) . back ( ) + size ) ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : insertDefault ( )
{
2019-01-08 14:56:07 +00:00
/// NOTE 1: We can use back() even if the array is empty (due to zero -1th element in PODArray).
/// NOTE 2: We cannot use reference in push_back, because reference get invalidated if array is reallocated.
auto last_offset = getOffsets ( ) . back ( ) ;
getOffsets ( ) . push_back ( last_offset ) ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : popBack ( size_t n )
{
2018-08-27 18:16:32 +00:00
auto & offsets_data = getOffsets ( ) ;
size_t nested_n = offsets_data . back ( ) - offsetAt ( offsets_data . size ( ) - n ) ;
2017-04-01 07:20:54 +00:00
if ( nested_n )
getData ( ) . popBack ( nested_n ) ;
2018-08-27 18:16:32 +00:00
offsets_data . resize_assume_reserved ( offsets_data . size ( ) - n ) ;
2017-03-09 03:34:09 +00:00
}
int ColumnArray : : compareAt ( size_t n , size_t m , const IColumn & rhs_ , int nan_direction_hint ) const
{
2019-08-21 02:28:04 +00:00
const ColumnArray & rhs = assert_cast < const ColumnArray & > ( rhs_ ) ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
/// Suboptimal
size_t lhs_size = sizeAt ( n ) ;
size_t rhs_size = rhs . sizeAt ( m ) ;
size_t min_size = std : : min ( lhs_size , rhs_size ) ;
for ( size_t i = 0 ; i < min_size ; + + i )
if ( int res = getData ( ) . compareAt ( offsetAt ( n ) + i , rhs . offsetAt ( m ) + i , * rhs . data . get ( ) , nan_direction_hint ) )
return res ;
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
return lhs_size < rhs_size
? - 1
: ( lhs_size = = rhs_size
? 0
: 1 ) ;
2017-03-09 03:34:09 +00:00
}
namespace
{
2017-04-01 07:20:54 +00:00
template < bool positive >
2020-03-23 02:12:31 +00:00
struct Less
2017-04-01 07:20:54 +00:00
{
const ColumnArray & parent ;
int nan_direction_hint ;
2017-03-09 03:34:09 +00:00
2020-03-23 02:12:31 +00:00
Less ( const ColumnArray & parent_ , int nan_direction_hint_ )
2017-04-01 07:20:54 +00:00
: parent ( parent_ ) , nan_direction_hint ( nan_direction_hint_ ) { }
2017-03-09 03:34:09 +00:00
2017-04-01 07:20:54 +00:00
bool operator ( ) ( size_t lhs , size_t rhs ) const
{
if ( positive )
return parent . compareAt ( lhs , rhs , parent , nan_direction_hint ) < 0 ;
else
return parent . compareAt ( lhs , rhs , parent , nan_direction_hint ) > 0 ;
}
} ;
2017-03-09 03:34:09 +00:00
}
void ColumnArray : : reserve ( size_t n )
{
2017-04-01 07:20:54 +00:00
getOffsets ( ) . reserve ( n ) ;
2020-03-23 02:12:31 +00:00
getData ( ) . reserve ( n ) ; /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
2017-03-09 03:34:09 +00:00
}
size_t ColumnArray : : byteSize ( ) const
{
2017-04-01 07:20:54 +00:00
return getData ( ) . byteSize ( ) + getOffsets ( ) . size ( ) * sizeof ( getOffsets ( ) [ 0 ] ) ;
2017-03-09 03:34:09 +00:00
}
2017-07-13 16:49:09 +00:00
size_t ColumnArray : : allocatedBytes ( ) const
2017-03-09 03:34:09 +00:00
{
2017-07-13 16:53:34 +00:00
return getData ( ) . allocatedBytes ( ) + getOffsets ( ) . allocated_bytes ( ) ;
2017-03-09 03:34:09 +00:00
}
2019-03-10 03:16:51 +00:00
void ColumnArray : : protect ( )
{
getData ( ) . protect ( ) ;
getOffsets ( ) . protect ( ) ;
}
2017-03-09 03:34:09 +00:00
bool ColumnArray : : hasEqualOffsets ( const ColumnArray & other ) const
{
2017-04-01 07:20:54 +00:00
if ( offsets = = other . offsets )
return true ;
2017-03-09 03:34:09 +00:00
2017-12-15 21:32:25 +00:00
const Offsets & offsets1 = getOffsets ( ) ;
const Offsets & offsets2 = other . getOffsets ( ) ;
2018-09-02 03:40:15 +00:00
return offsets1 . size ( ) = = offsets2 . size ( )
2020-03-09 01:03:43 +00:00
& & ( offsets1 . empty ( ) | | 0 = = memcmp ( offsets1 . data ( ) , offsets2 . data ( ) , sizeof ( offsets1 [ 0 ] ) * offsets1 . size ( ) ) ) ;
2017-03-09 03:34:09 +00:00
}
2018-12-23 01:46:30 +00:00
ColumnPtr ColumnArray : : convertToFullColumnIfConst ( ) const
{
/// It is possible to have an array with constant data and non-constant offsets.
/// Example is the result of expression: replicate('hello', [1])
return ColumnArray : : create ( data - > convertToFullColumnIfConst ( ) , offsets ) ;
}
2017-03-09 03:34:09 +00:00
void ColumnArray : : getExtremes ( Field & min , Field & max ) const
{
2017-04-01 07:20:54 +00:00
min = Array ( ) ;
max = Array ( ) ;
2017-09-07 13:22:25 +00:00
size_t col_size = size ( ) ;
if ( col_size = = 0 )
return ;
size_t min_idx = 0 ;
size_t max_idx = 0 ;
for ( size_t i = 1 ; i < col_size ; + + i )
{
if ( compareAt ( i , min_idx , * this , /* nan_direction_hint = */ 1 ) < 0 )
min_idx = i ;
2017-09-14 11:52:22 +00:00
else if ( compareAt ( i , max_idx , * this , /* nan_direction_hint = */ - 1 ) > 0 )
2017-09-07 13:22:25 +00:00
max_idx = i ;
}
get ( min_idx , min ) ;
get ( max_idx , max ) ;
2017-03-09 03:34:09 +00:00
}
2015-07-06 19:24:51 +00:00
2015-11-29 17:06:30 +00:00
void ColumnArray : : insertRangeFrom ( const IColumn & src , size_t start , size_t length )
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
if ( length = = 0 )
return ;
2015-07-06 19:24:51 +00:00
2019-08-21 02:28:04 +00:00
const ColumnArray & src_concrete = assert_cast < const ColumnArray & > ( src ) ;
2015-11-29 17:06:30 +00:00
2017-04-01 07:20:54 +00:00
if ( start + length > src_concrete . getOffsets ( ) . size ( ) )
2019-02-27 22:18:58 +00:00
throw Exception ( " Parameter out of bound in ColumnArray::insertRangeFrom method. [start( " + std : : to_string ( start ) + " ) + length( " + std : : to_string ( length ) + " ) > offsets.size( " + std : : to_string ( src_concrete . getOffsets ( ) . size ( ) ) + " )] " ,
2017-04-01 07:20:54 +00:00
ErrorCodes : : PARAMETER_OUT_OF_BOUND ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
size_t nested_offset = src_concrete . offsetAt ( start ) ;
size_t nested_length = src_concrete . getOffsets ( ) [ start + length - 1 ] - nested_offset ;
2015-07-06 19:24:51 +00:00
2017-12-17 05:21:04 +00:00
getData ( ) . insertRangeFrom ( src_concrete . getData ( ) , nested_offset , nested_length ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offsets & cur_offsets = getOffsets ( ) ;
const Offsets & src_offsets = src_concrete . getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( start = = 0 & & cur_offsets . empty ( ) )
{
cur_offsets . assign ( src_offsets . begin ( ) , src_offsets . begin ( ) + length ) ;
}
else
{
size_t old_size = cur_offsets . size ( ) ;
size_t prev_max_offset = old_size ? cur_offsets . back ( ) : 0 ;
cur_offsets . resize ( old_size + length ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < length ; + + i )
cur_offsets [ old_size + i ] = src_offsets [ start + i ] - nested_offset + prev_max_offset ;
}
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filter ( const Filter & filt , ssize_t result_size_hint ) const
2015-11-21 03:19:43 +00:00
{
2017-04-18 03:03:39 +00:00
if ( typeid_cast < const ColumnUInt8 * > ( data . get ( ) ) ) return filterNumber < UInt8 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnUInt16 * > ( data . get ( ) ) ) return filterNumber < UInt16 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnUInt32 * > ( data . get ( ) ) ) return filterNumber < UInt32 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnUInt64 * > ( data . get ( ) ) ) return filterNumber < UInt64 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnInt8 * > ( data . get ( ) ) ) return filterNumber < Int8 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnInt16 * > ( data . get ( ) ) ) return filterNumber < Int16 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnInt32 * > ( data . get ( ) ) ) return filterNumber < Int32 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnInt64 * > ( data . get ( ) ) ) return filterNumber < Int64 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnFloat32 * > ( data . get ( ) ) ) return filterNumber < Float32 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnFloat64 * > ( data . get ( ) ) ) return filterNumber < Float64 > ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnString * > ( data . get ( ) ) ) return filterString ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnTuple * > ( data . get ( ) ) ) return filterTuple ( filt , result_size_hint ) ;
if ( typeid_cast < const ColumnNullable * > ( data . get ( ) ) ) return filterNullable ( filt , result_size_hint ) ;
2017-04-01 07:20:54 +00:00
return filterGeneric ( filt , result_size_hint ) ;
2015-11-21 03:19:43 +00:00
}
template < typename T >
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filterNumber ( const Filter & filt , ssize_t result_size_hint ) const
2015-11-21 03:19:43 +00:00
{
2020-03-08 21:40:00 +00:00
if ( getOffsets ( ) . empty ( ) )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2015-11-21 03:19:43 +00:00
2017-12-15 19:46:24 +00:00
auto res = ColumnArray : : create ( data - > cloneEmpty ( ) ) ;
2015-11-21 03:19:43 +00:00
2019-08-21 02:28:04 +00:00
auto & res_elems = assert_cast < ColumnVector < T > & > ( res - > getData ( ) ) . getData ( ) ;
2017-12-15 21:32:25 +00:00
Offsets & res_offsets = res - > getOffsets ( ) ;
2015-11-21 03:19:43 +00:00
2019-08-21 02:28:04 +00:00
filterArraysImpl < T > ( assert_cast < const ColumnVector < T > & > ( * data ) . getData ( ) , getOffsets ( ) , res_elems , res_offsets , filt , result_size_hint ) ;
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2015-11-21 03:19:43 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filterString ( const Filter & filt , ssize_t result_size_hint ) const
2015-11-21 03:19:43 +00:00
{
2017-04-01 07:20:54 +00:00
size_t col_size = getOffsets ( ) . size ( ) ;
if ( col_size ! = filt . size ( ) )
throw Exception ( " Size of filter doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
if ( 0 = = col_size )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2015-11-21 03:19:43 +00:00
2017-12-15 19:46:24 +00:00
auto res = ColumnArray : : create ( data - > cloneEmpty ( ) ) ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
const ColumnString & src_string = typeid_cast < const ColumnString & > ( * data ) ;
2018-11-25 00:08:50 +00:00
const ColumnString : : Chars & src_chars = src_string . getChars ( ) ;
2017-12-15 21:32:25 +00:00
const Offsets & src_string_offsets = src_string . getOffsets ( ) ;
const Offsets & src_offsets = getOffsets ( ) ;
2015-11-21 03:19:43 +00:00
2018-11-25 00:08:50 +00:00
ColumnString : : Chars & res_chars = typeid_cast < ColumnString & > ( res - > getData ( ) ) . getChars ( ) ;
2017-12-15 21:32:25 +00:00
Offsets & res_string_offsets = typeid_cast < ColumnString & > ( res - > getData ( ) ) . getOffsets ( ) ;
Offsets & res_offsets = res - > getOffsets ( ) ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
if ( result_size_hint < 0 ) /// Other cases are not considered.
{
res_chars . reserve ( src_chars . size ( ) ) ;
res_string_offsets . reserve ( src_string_offsets . size ( ) ) ;
res_offsets . reserve ( col_size ) ;
}
2015-11-21 03:19:43 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_src_offset = 0 ;
Offset prev_src_string_offset = 0 ;
2015-11-21 03:19:43 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_res_offset = 0 ;
Offset prev_res_string_offset = 0 ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < col_size ; + + i )
{
/// Number of rows in the array.
size_t array_size = src_offsets [ i ] - prev_src_offset ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
if ( filt [ i ] )
{
/// If the array is not empty - copy content.
if ( array_size )
{
size_t chars_to_copy = src_string_offsets [ array_size + prev_src_offset - 1 ] - prev_src_string_offset ;
size_t res_chars_prev_size = res_chars . size ( ) ;
res_chars . resize ( res_chars_prev_size + chars_to_copy ) ;
memcpy ( & res_chars [ res_chars_prev_size ] , & src_chars [ prev_src_string_offset ] , chars_to_copy ) ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t j = 0 ; j < array_size ; + + j )
res_string_offsets . push_back ( src_string_offsets [ j + prev_src_offset ] + prev_res_string_offset - prev_src_string_offset ) ;
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
prev_res_string_offset = res_string_offsets . back ( ) ;
}
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
prev_res_offset + = array_size ;
res_offsets . push_back ( prev_res_offset ) ;
}
2015-11-21 03:19:43 +00:00
2017-04-01 07:20:54 +00:00
if ( array_size )
{
prev_src_offset + = array_size ;
prev_src_string_offset = src_string_offsets [ prev_src_offset - 1 ] ;
}
}
2015-11-21 03:19:43 +00:00
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2015-11-21 03:19:43 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filterGeneric ( const Filter & filt , ssize_t result_size_hint ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t size = getOffsets ( ) . size ( ) ;
if ( size ! = filt . size ( ) )
throw Exception ( " Size of filter doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( size = = 0 )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
Filter nested_filt ( getOffsets ( ) . back ( ) ) ;
for ( size_t i = 0 ; i < size ; + + i )
{
if ( filt [ i ] )
memset ( & nested_filt [ offsetAt ( i ) ] , 1 , sizeAt ( i ) ) ;
else
memset ( & nested_filt [ offsetAt ( i ) ] , 0 , sizeAt ( i ) ) ;
}
2015-07-06 19:24:51 +00:00
2018-01-14 01:25:35 +00:00
auto res = ColumnArray : : create ( data - > cloneEmpty ( ) ) ;
2015-12-05 07:01:18 +00:00
2017-04-01 07:20:54 +00:00
ssize_t nested_result_size_hint = 0 ;
if ( result_size_hint < 0 )
nested_result_size_hint = result_size_hint ;
else if ( result_size_hint & & result_size_hint < 1000000000 & & data - > size ( ) < 1000000000 ) /// Avoid overflow.
nested_result_size_hint = result_size_hint * data - > size ( ) / size ;
2015-12-05 07:01:18 +00:00
2017-04-01 07:20:54 +00:00
res - > data = data - > filter ( nested_filt , nested_result_size_hint ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offsets & res_offsets = res - > getOffsets ( ) ;
2017-04-01 07:20:54 +00:00
if ( result_size_hint )
res_offsets . reserve ( result_size_hint > 0 ? result_size_hint : size ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
size_t current_offset = 0 ;
for ( size_t i = 0 ; i < size ; + + i )
{
if ( filt [ i ] )
{
current_offset + = sizeAt ( i ) ;
res_offsets . push_back ( current_offset ) ;
}
}
2015-07-06 19:24:51 +00:00
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filterNullable ( const Filter & filt , ssize_t result_size_hint ) const
2017-04-18 03:03:39 +00:00
{
2020-03-08 21:40:00 +00:00
if ( getOffsets ( ) . empty ( ) )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2017-04-18 03:03:39 +00:00
2019-08-21 02:28:04 +00:00
const ColumnNullable & nullable_elems = assert_cast < const ColumnNullable & > ( * data ) ;
2017-04-18 03:03:39 +00:00
2017-12-14 03:56:56 +00:00
auto array_of_nested = ColumnArray : : create ( nullable_elems . getNestedColumnPtr ( ) , offsets ) ;
2017-04-18 03:03:39 +00:00
auto filtered_array_of_nested_owner = array_of_nested - > filter ( filt , result_size_hint ) ;
2019-08-21 02:28:04 +00:00
auto & filtered_array_of_nested = assert_cast < const ColumnArray & > ( * filtered_array_of_nested_owner ) ;
2017-12-15 02:36:40 +00:00
auto & filtered_offsets = filtered_array_of_nested . getOffsetsPtr ( ) ;
2017-04-18 03:03:39 +00:00
2017-12-14 01:43:19 +00:00
auto res_null_map = ColumnUInt8 : : create ( ) ;
2017-12-15 19:46:24 +00:00
filterArraysImplOnlyData ( nullable_elems . getNullMapData ( ) , getOffsets ( ) , res_null_map - > getData ( ) , filt , result_size_hint ) ;
return ColumnArray : : create (
2017-12-14 01:43:19 +00:00
ColumnNullable : : create (
2017-04-18 03:03:39 +00:00
filtered_array_of_nested . getDataPtr ( ) ,
2017-12-15 19:53:11 +00:00
std : : move ( res_null_map ) ) ,
2017-04-18 03:03:39 +00:00
filtered_offsets ) ;
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : filterTuple ( const Filter & filt , ssize_t result_size_hint ) const
2017-04-18 03:03:39 +00:00
{
2020-03-08 21:40:00 +00:00
if ( getOffsets ( ) . empty ( ) )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2017-04-18 03:03:39 +00:00
2019-08-21 02:28:04 +00:00
const ColumnTuple & tuple = assert_cast < const ColumnTuple & > ( * data ) ;
2017-04-18 03:03:39 +00:00
/// Make temporary arrays for each components of Tuple, then filter and collect back.
2019-03-25 01:43:54 +00:00
size_t tuple_size = tuple . tupleSize ( ) ;
2017-04-18 03:03:39 +00:00
if ( tuple_size = = 0 )
throw Exception ( " Logical error: empty tuple " , ErrorCodes : : LOGICAL_ERROR ) ;
Columns temporary_arrays ( tuple_size ) ;
for ( size_t i = 0 ; i < tuple_size ; + + i )
2018-03-20 14:17:09 +00:00
temporary_arrays [ i ] = ColumnArray ( tuple . getColumns ( ) [ i ] - > assumeMutable ( ) , getOffsetsPtr ( ) - > assumeMutable ( ) )
. filter ( filt , result_size_hint ) ;
2017-04-18 03:03:39 +00:00
2017-12-08 00:50:25 +00:00
Columns tuple_columns ( tuple_size ) ;
2017-04-18 03:03:39 +00:00
for ( size_t i = 0 ; i < tuple_size ; + + i )
2019-08-21 02:28:04 +00:00
tuple_columns [ i ] = assert_cast < const ColumnArray & > ( * temporary_arrays [ i ] ) . getDataPtr ( ) ;
2017-04-18 03:03:39 +00:00
2017-12-14 01:43:19 +00:00
return ColumnArray : : create (
ColumnTuple : : create ( tuple_columns ) ,
2019-08-21 02:28:04 +00:00
assert_cast < const ColumnArray & > ( * temporary_arrays . front ( ) ) . getOffsetsPtr ( ) ) ;
2017-04-18 03:03:39 +00:00
}
2015-07-06 19:24:51 +00:00
2019-02-18 19:44:26 +00:00
ColumnPtr ColumnArray : : permute ( const Permutation & perm , size_t limit ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t size = getOffsets ( ) . size ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( limit = = 0 )
limit = size ;
else
limit = std : : min ( size , limit ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( perm . size ( ) < limit )
throw Exception ( " Size of permutation is less than required. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( limit = = 0 )
2017-12-15 19:46:24 +00:00
return ColumnArray : : create ( data ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
Permutation nested_perm ( getOffsets ( ) . back ( ) ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 19:46:24 +00:00
auto res = ColumnArray : : create ( data - > cloneEmpty ( ) ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offsets & res_offsets = res - > getOffsets ( ) ;
2017-04-01 07:20:54 +00:00
res_offsets . resize ( limit ) ;
size_t current_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < limit ; + + i )
{
for ( size_t j = 0 ; j < sizeAt ( perm [ i ] ) ; + + j )
nested_perm [ current_offset + j ] = offsetAt ( perm [ i ] ) + j ;
current_offset + = sizeAt ( perm [ i ] ) ;
res_offsets [ i ] = current_offset ;
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( current_offset ! = 0 )
res - > data = data - > permute ( nested_perm , current_offset ) ;
2015-07-06 19:24:51 +00:00
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2015-07-06 19:24:51 +00:00
}
2019-02-18 17:28:53 +00:00
ColumnPtr ColumnArray : : index ( const IColumn & indexes , size_t limit ) const
2018-04-18 21:00:47 +00:00
{
return selectIndexImpl ( * this , indexes , limit ) ;
}
template < typename T >
2019-02-18 17:28:53 +00:00
ColumnPtr ColumnArray : : indexImpl ( const PaddedPODArray < T > & indexes , size_t limit ) const
2018-04-18 21:00:47 +00:00
{
if ( limit = = 0 )
return ColumnArray : : create ( data ) ;
/// Convert indexes to UInt64 in case of overflow.
auto nested_indexes_column = ColumnUInt64 : : create ( ) ;
PaddedPODArray < UInt64 > & nested_indexes = nested_indexes_column - > getData ( ) ;
nested_indexes . reserve ( getOffsets ( ) . back ( ) ) ;
auto res = ColumnArray : : create ( data - > cloneEmpty ( ) ) ;
Offsets & res_offsets = res - > getOffsets ( ) ;
res_offsets . resize ( limit ) ;
size_t current_offset = 0 ;
for ( size_t i = 0 ; i < limit ; + + i )
{
for ( size_t j = 0 ; j < sizeAt ( indexes [ i ] ) ; + + j )
nested_indexes . push_back ( offsetAt ( indexes [ i ] ) + j ) ;
current_offset + = sizeAt ( indexes [ i ] ) ;
res_offsets [ i ] = current_offset ;
}
if ( current_offset ! = 0 )
2018-06-07 18:14:37 +00:00
res - > data = data - > index ( * nested_indexes_column , current_offset ) ;
2018-04-18 21:00:47 +00:00
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2018-04-18 21:00:47 +00:00
}
2018-08-26 00:44:23 +00:00
INSTANTIATE_INDEX_IMPL ( ColumnArray )
2018-04-18 21:00:47 +00:00
2019-02-18 19:44:26 +00:00
void ColumnArray : : getPermutation ( bool reverse , size_t limit , int nan_direction_hint , Permutation & res ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t s = size ( ) ;
if ( limit > = s )
limit = 0 ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
res . resize ( s ) ;
for ( size_t i = 0 ; i < s ; + + i )
res [ i ] = i ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( limit )
{
if ( reverse )
2020-03-23 02:12:31 +00:00
std : : partial_sort ( res . begin ( ) , res . begin ( ) + limit , res . end ( ) , Less < false > ( * this , nan_direction_hint ) ) ;
2017-04-01 07:20:54 +00:00
else
2020-03-23 02:12:31 +00:00
std : : partial_sort ( res . begin ( ) , res . begin ( ) + limit , res . end ( ) , Less < true > ( * this , nan_direction_hint ) ) ;
2017-04-01 07:20:54 +00:00
}
else
{
if ( reverse )
2020-03-23 02:12:31 +00:00
std : : sort ( res . begin ( ) , res . end ( ) , Less < false > ( * this , nan_direction_hint ) ) ;
2017-04-01 07:20:54 +00:00
else
2020-03-23 02:12:31 +00:00
std : : sort ( res . begin ( ) , res . end ( ) , Less < true > ( * this , nan_direction_hint ) ) ;
2017-04-01 07:20:54 +00:00
}
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicate ( const Offsets & replicate_offsets ) const
2015-07-06 19:24:51 +00:00
{
2018-09-02 04:13:15 +00:00
if ( replicate_offsets . empty ( ) )
return cloneEmpty ( ) ;
2018-11-26 00:56:50 +00:00
if ( typeid_cast < const ColumnUInt8 * > ( data . get ( ) ) ) return replicateNumber < UInt8 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnUInt16 * > ( data . get ( ) ) ) return replicateNumber < UInt16 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnUInt32 * > ( data . get ( ) ) ) return replicateNumber < UInt32 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnUInt64 * > ( data . get ( ) ) ) return replicateNumber < UInt64 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnInt8 * > ( data . get ( ) ) ) return replicateNumber < Int8 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnInt16 * > ( data . get ( ) ) ) return replicateNumber < Int16 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnInt32 * > ( data . get ( ) ) ) return replicateNumber < Int32 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnInt64 * > ( data . get ( ) ) ) return replicateNumber < Int64 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnFloat32 * > ( data . get ( ) ) ) return replicateNumber < Float32 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnFloat64 * > ( data . get ( ) ) ) return replicateNumber < Float64 > ( replicate_offsets ) ;
if ( typeid_cast < const ColumnString * > ( data . get ( ) ) ) return replicateString ( replicate_offsets ) ;
if ( typeid_cast < const ColumnConst * > ( data . get ( ) ) ) return replicateConst ( replicate_offsets ) ;
if ( typeid_cast < const ColumnNullable * > ( data . get ( ) ) ) return replicateNullable ( replicate_offsets ) ;
if ( typeid_cast < const ColumnTuple * > ( data . get ( ) ) ) return replicateTuple ( replicate_offsets ) ;
2017-04-18 03:03:39 +00:00
return replicateGeneric ( replicate_offsets ) ;
2015-07-06 19:24:51 +00:00
}
template < typename T >
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateNumber ( const Offsets & replicate_offsets ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t col_size = size ( ) ;
if ( col_size ! = replicate_offsets . size ( ) )
throw Exception ( " Size of offsets doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-07-06 19:24:51 +00:00
2017-12-14 01:43:19 +00:00
MutableColumnPtr res = cloneEmpty ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( 0 = = col_size )
return res ;
2015-07-06 19:24:51 +00:00
2020-03-23 02:12:31 +00:00
ColumnArray & res_arr = typeid_cast < ColumnArray & > ( * res ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
const typename ColumnVector < T > : : Container & src_data = typeid_cast < const ColumnVector < T > & > ( * data ) . getData ( ) ;
const Offsets & src_offsets = getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2020-03-23 02:12:31 +00:00
typename ColumnVector < T > : : Container & res_data = typeid_cast < ColumnVector < T > & > ( res_arr . getData ( ) ) . getData ( ) ;
Offsets & res_offsets = res_arr . getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
res_data . reserve ( data - > size ( ) / col_size * replicate_offsets . back ( ) ) ;
res_offsets . reserve ( replicate_offsets . back ( ) ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_replicate_offset = 0 ;
Offset prev_data_offset = 0 ;
Offset current_new_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < col_size ; + + i )
{
size_t size_to_replicate = replicate_offsets [ i ] - prev_replicate_offset ;
size_t value_size = src_offsets [ i ] - prev_data_offset ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t j = 0 ; j < size_to_replicate ; + + j )
{
current_new_offset + = value_size ;
res_offsets . push_back ( current_new_offset ) ;
2015-07-06 19:24:51 +00:00
2018-09-02 04:30:55 +00:00
if ( value_size )
{
res_data . resize ( res_data . size ( ) + value_size ) ;
memcpy ( & res_data [ res_data . size ( ) - value_size ] , & src_data [ prev_data_offset ] , value_size * sizeof ( T ) ) ;
}
2017-04-01 07:20:54 +00:00
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
prev_replicate_offset = replicate_offsets [ i ] ;
prev_data_offset = src_offsets [ i ] ;
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
return res ;
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateString ( const Offsets & replicate_offsets ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t col_size = size ( ) ;
if ( col_size ! = replicate_offsets . size ( ) )
throw Exception ( " Size of offsets doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-07-06 19:24:51 +00:00
2017-12-14 01:43:19 +00:00
MutableColumnPtr res = cloneEmpty ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( 0 = = col_size )
return res ;
2015-07-06 19:24:51 +00:00
2020-03-23 02:12:31 +00:00
ColumnArray & res_arr = assert_cast < ColumnArray & > ( * res ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
const ColumnString & src_string = typeid_cast < const ColumnString & > ( * data ) ;
2018-11-25 00:08:50 +00:00
const ColumnString : : Chars & src_chars = src_string . getChars ( ) ;
2017-12-15 21:32:25 +00:00
const Offsets & src_string_offsets = src_string . getOffsets ( ) ;
const Offsets & src_offsets = getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2020-03-23 02:12:31 +00:00
ColumnString : : Chars & res_chars = typeid_cast < ColumnString & > ( res_arr . getData ( ) ) . getChars ( ) ;
Offsets & res_string_offsets = typeid_cast < ColumnString & > ( res_arr . getData ( ) ) . getOffsets ( ) ;
Offsets & res_offsets = res_arr . getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
res_chars . reserve ( src_chars . size ( ) / col_size * replicate_offsets . back ( ) ) ;
res_string_offsets . reserve ( src_string_offsets . size ( ) / col_size * replicate_offsets . back ( ) ) ;
res_offsets . reserve ( replicate_offsets . back ( ) ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_replicate_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_src_offset = 0 ;
Offset prev_src_string_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offset current_res_offset = 0 ;
Offset current_res_string_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < col_size ; + + i )
{
2019-03-30 10:46:17 +00:00
/// How many times to replicate the array.
2017-04-01 07:20:54 +00:00
size_t size_to_replicate = replicate_offsets [ i ] - prev_replicate_offset ;
2019-03-30 10:46:17 +00:00
/// The number of strings in the array.
2017-04-01 07:20:54 +00:00
size_t value_size = src_offsets [ i ] - prev_src_offset ;
2019-03-30 10:46:17 +00:00
/// Number of characters in strings of the array, including zero bytes.
size_t sum_chars_size = src_string_offsets [ prev_src_offset + value_size - 1 ] - prev_src_string_offset ; /// -1th index is Ok, see PaddedPODArray.
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t j = 0 ; j < size_to_replicate ; + + j )
{
current_res_offset + = value_size ;
res_offsets . push_back ( current_res_offset ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
size_t prev_src_string_offset_local = prev_src_string_offset ;
for ( size_t k = 0 ; k < value_size ; + + k )
{
2019-03-30 10:46:17 +00:00
/// Size of single string.
2017-04-01 07:20:54 +00:00
size_t chars_size = src_string_offsets [ k + prev_src_offset ] - prev_src_string_offset_local ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
current_res_string_offset + = chars_size ;
res_string_offsets . push_back ( current_res_string_offset ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
prev_src_string_offset_local + = chars_size ;
}
2016-06-28 19:43:32 +00:00
2018-09-02 04:33:15 +00:00
if ( sum_chars_size )
{
2019-03-29 15:06:35 +00:00
/// Copies the characters of the array of strings.
2018-09-02 04:33:15 +00:00
res_chars . resize ( res_chars . size ( ) + sum_chars_size ) ;
memcpySmallAllowReadWriteOverflow15 (
& res_chars [ res_chars . size ( ) - sum_chars_size ] , & src_chars [ prev_src_string_offset ] , sum_chars_size ) ;
}
2017-04-01 07:20:54 +00:00
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
prev_replicate_offset = replicate_offsets [ i ] ;
prev_src_offset = src_offsets [ i ] ;
prev_src_string_offset + = sum_chars_size ;
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
return res ;
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateConst ( const Offsets & replicate_offsets ) const
2015-07-06 19:24:51 +00:00
{
2017-04-01 07:20:54 +00:00
size_t col_size = size ( ) ;
if ( col_size ! = replicate_offsets . size ( ) )
throw Exception ( " Size of offsets doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
if ( 0 = = col_size )
return cloneEmpty ( ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
const Offsets & src_offsets = getOffsets ( ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
auto res_column_offsets = ColumnOffsets : : create ( ) ;
Offsets & res_offsets = res_column_offsets - > getData ( ) ;
2017-04-01 07:20:54 +00:00
res_offsets . reserve ( replicate_offsets . back ( ) ) ;
2015-07-06 19:24:51 +00:00
2017-12-15 21:32:25 +00:00
Offset prev_replicate_offset = 0 ;
Offset prev_data_offset = 0 ;
Offset current_new_offset = 0 ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < col_size ; + + i )
{
size_t size_to_replicate = replicate_offsets [ i ] - prev_replicate_offset ;
size_t value_size = src_offsets [ i ] - prev_data_offset ;
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
for ( size_t j = 0 ; j < size_to_replicate ; + + j )
{
current_new_offset + = value_size ;
res_offsets . push_back ( current_new_offset ) ;
}
2015-07-06 19:24:51 +00:00
2017-04-01 07:20:54 +00:00
prev_replicate_offset = replicate_offsets [ i ] ;
prev_data_offset = src_offsets [ i ] ;
}
2015-07-06 19:24:51 +00:00
2017-12-15 19:53:11 +00:00
return ColumnArray : : create ( getData ( ) . cloneResized ( current_new_offset ) , std : : move ( res_column_offsets ) ) ;
2015-07-06 19:24:51 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateGeneric ( const Offsets & replicate_offsets ) const
2017-04-18 03:03:39 +00:00
{
size_t col_size = size ( ) ;
if ( col_size ! = replicate_offsets . size ( ) )
throw Exception ( " Size of offsets doesn't match size of column. " , ErrorCodes : : SIZES_OF_COLUMNS_DOESNT_MATCH ) ;
2017-12-14 01:43:19 +00:00
MutableColumnPtr res = cloneEmpty ( ) ;
2019-08-21 02:28:04 +00:00
ColumnArray & res_concrete = assert_cast < ColumnArray & > ( * res ) ;
2017-04-18 03:03:39 +00:00
if ( 0 = = col_size )
return res ;
2017-12-15 21:32:25 +00:00
IColumn : : Offset prev_offset = 0 ;
2017-04-18 03:03:39 +00:00
for ( size_t i = 0 ; i < col_size ; + + i )
{
2017-07-21 06:35:58 +00:00
size_t size_to_replicate = replicate_offsets [ i ] - prev_offset ;
prev_offset = replicate_offsets [ i ] ;
2017-04-18 03:03:39 +00:00
for ( size_t j = 0 ; j < size_to_replicate ; + + j )
res_concrete . insertFrom ( * this , i ) ;
}
return res ;
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateNullable ( const Offsets & replicate_offsets ) const
2017-04-18 03:03:39 +00:00
{
2019-08-21 02:28:04 +00:00
const ColumnNullable & nullable = assert_cast < const ColumnNullable & > ( * data ) ;
2017-04-18 03:03:39 +00:00
/// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result.
/// NOTE Offsets are calculated twice and it is redundant.
2018-03-20 14:17:09 +00:00
auto array_of_nested = ColumnArray ( nullable . getNestedColumnPtr ( ) - > assumeMutable ( ) , getOffsetsPtr ( ) - > assumeMutable ( ) )
. replicate ( replicate_offsets ) ;
auto array_of_null_map = ColumnArray ( nullable . getNullMapColumnPtr ( ) - > assumeMutable ( ) , getOffsetsPtr ( ) - > assumeMutable ( ) )
. replicate ( replicate_offsets ) ;
2017-04-18 03:03:39 +00:00
2017-12-14 01:43:19 +00:00
return ColumnArray : : create (
ColumnNullable : : create (
2019-08-21 02:28:04 +00:00
assert_cast < const ColumnArray & > ( * array_of_nested ) . getDataPtr ( ) ,
assert_cast < const ColumnArray & > ( * array_of_null_map ) . getDataPtr ( ) ) ,
assert_cast < const ColumnArray & > ( * array_of_nested ) . getOffsetsPtr ( ) ) ;
2017-04-18 03:03:39 +00:00
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnArray : : replicateTuple ( const Offsets & replicate_offsets ) const
2017-04-18 03:03:39 +00:00
{
2019-08-21 02:28:04 +00:00
const ColumnTuple & tuple = assert_cast < const ColumnTuple & > ( * data ) ;
2017-04-18 03:03:39 +00:00
/// Make temporary arrays for each components of Tuple. In the same way as for Nullable.
2019-03-25 01:43:54 +00:00
size_t tuple_size = tuple . tupleSize ( ) ;
2017-04-18 03:03:39 +00:00
if ( tuple_size = = 0 )
throw Exception ( " Logical error: empty tuple " , ErrorCodes : : LOGICAL_ERROR ) ;
Columns temporary_arrays ( tuple_size ) ;
for ( size_t i = 0 ; i < tuple_size ; + + i )
2018-03-20 14:17:09 +00:00
temporary_arrays [ i ] = ColumnArray ( tuple . getColumns ( ) [ i ] - > assumeMutable ( ) , getOffsetsPtr ( ) - > assumeMutable ( ) )
. replicate ( replicate_offsets ) ;
2017-04-18 03:03:39 +00:00
2017-12-08 00:50:25 +00:00
Columns tuple_columns ( tuple_size ) ;
2017-04-18 03:03:39 +00:00
for ( size_t i = 0 ; i < tuple_size ; + + i )
2019-08-21 02:28:04 +00:00
tuple_columns [ i ] = assert_cast < const ColumnArray & > ( * temporary_arrays [ i ] ) . getDataPtr ( ) ;
2017-04-18 03:03:39 +00:00
2017-12-14 01:43:19 +00:00
return ColumnArray : : create (
ColumnTuple : : create ( tuple_columns ) ,
2019-08-21 02:28:04 +00:00
assert_cast < const ColumnArray & > ( * temporary_arrays . front ( ) ) . getOffsetsPtr ( ) ) ;
2017-04-18 03:03:39 +00:00
}
2017-07-06 13:54:55 +00:00
void ColumnArray : : gather ( ColumnGathererStream & gatherer )
{
gatherer . gather ( * this ) ;
}
2015-07-06 19:24:51 +00:00
}