2011-08-22 08:43:52 +00:00
# pragma once
2010-03-12 18:25:35 +00:00
2019-04-19 20:21:17 +00:00
# include <Common/COW.h>
2019-10-07 18:56:03 +00:00
# include <Common/PODArray_fwd.h>
2017-04-01 09:19:00 +00:00
# include <Common/Exception.h>
2019-06-27 18:50:20 +00:00
# include <Common/typeid_cast.h>
2021-10-02 07:13:14 +00:00
# include <base/StringRef.h>
2023-03-22 01:33:10 +00:00
# include <Core/TypeId.h>
2010-03-12 18:25:35 +00:00
2022-09-28 13:29:29 +00:00
# include "config.h"
2022-02-09 16:32:52 +00:00
2011-08-22 08:43:52 +00:00
2016-07-10 15:58:58 +00:00
class SipHash ;
2020-10-29 11:24:01 +00:00
class Collator ;
2016-07-10 15:58:58 +00:00
2022-02-09 16:32:52 +00:00
namespace llvm
{
class LLVMContext ;
class Value ;
class IRBuilderBase ;
}
2010-03-12 18:25:35 +00:00
namespace DB
{
2016-01-11 21:46:36 +00:00
namespace ErrorCodes
{
extern const int CANNOT_GET_SIZE_OF_FIELD ;
extern const int NOT_IMPLEMENTED ;
2020-10-29 11:24:01 +00:00
extern const int BAD_COLLATION ;
2016-01-11 21:46:36 +00:00
}
2015-10-04 03:17:36 +00:00
class Arena ;
2017-07-06 13:54:55 +00:00
class ColumnGathererStream ;
2019-09-27 13:44:33 +00:00
class Field ;
2020-03-13 17:31:50 +00:00
class WeakHash32 ;
2011-08-22 08:43:52 +00:00
2020-05-25 14:33:31 +00:00
/*
* Represents a set of equal ranges in previous column to perform sorting in current column .
* Used in sorting by tuples .
* */
using EqualRanges = std : : vector < std : : pair < size_t , size_t > > ;
2020-05-12 00:58:58 +00:00
2017-02-17 17:39:02 +00:00
/// Declares interface to store columns in memory.
2019-04-19 20:21:17 +00:00
class IColumn : public COW < IColumn >
2010-03-12 18:25:35 +00:00
{
2017-12-13 01:27:53 +00:00
private :
2019-04-19 20:21:17 +00:00
friend class COW < IColumn > ;
2017-12-13 01:27:53 +00:00
/// Creates the same column with the same data.
2019-04-19 20:21:17 +00:00
/// This is internal method to use from COW.
2017-12-19 01:57:06 +00:00
/// It performs shallow copy with copy-ctor and not useful from outside.
/// If you want to copy column for modification, look at 'mutate' method.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual MutablePtr clone ( ) const = 0 ;
2017-12-13 01:27:53 +00:00
2017-12-19 01:53:54 +00:00
public :
2017-02-17 17:39:02 +00:00
/// Name of a Column. It is used in info messages.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual std : : string getName ( ) const { return getFamilyName ( ) ; }
2017-12-07 22:11:51 +00:00
/// Name of a Column kind, without parameters (example: FixedString, Array).
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual const char * getFamilyName ( ) const = 0 ;
2017-04-01 07:20:54 +00:00
2020-06-04 22:02:59 +00:00
/// Type of data that column contains. It's an underlying type: UInt16 for Date, UInt32 for DateTime, so on.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual TypeIndex getDataType ( ) const = 0 ;
2020-06-04 22:02:59 +00:00
2020-04-16 22:28:08 +00:00
/** If column isn't constant, returns itself.
2019-01-22 19:56:53 +00:00
* If column is constant , transforms constant to full column ( if column type allows such transform ) and return it .
2015-10-28 21:21:18 +00:00
*/
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr convertToFullColumnIfConst ( ) const { return getPtr ( ) ; }
2017-04-01 07:20:54 +00:00
2018-09-27 15:55:22 +00:00
/// If column isn't ColumnLowCardinality, return itself.
2021-05-21 00:57:11 +00:00
/// If column is ColumnLowCardinality, transforms it to full column.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr convertToFullColumnIfLowCardinality ( ) const { return getPtr ( ) ; }
2018-06-06 13:43:16 +00:00
2021-05-21 00:57:11 +00:00
/// If column isn't ColumnSparse, return itself.
/// If column is ColumnSparse, transforms it to full column.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr convertToFullColumnIfSparse ( ) const { return getPtr ( ) ; }
2021-03-12 16:33:41 +00:00
2022-04-18 02:22:40 +00:00
[[nodiscard]] Ptr convertToFullIfNeeded ( ) const
2021-04-15 02:16:09 +00:00
{
return convertToFullColumnIfSparse ( ) - > convertToFullColumnIfConst ( ) - > convertToFullColumnIfLowCardinality ( ) ;
}
2017-02-17 17:39:02 +00:00
/// Creates empty column with the same type.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual MutablePtr cloneEmpty ( ) const { return cloneResized ( 0 ) ; }
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Creates column with the same type and specified size.
2022-11-01 09:46:44 +00:00
/// If size is less than current size, then data is cut.
/// If size is greater, then default values are appended.
2022-05-31 00:10:47 +00:00
[[nodiscard]] virtual MutablePtr cloneResized ( size_t /*size*/ ) const { throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Cannot cloneResized() column { } " , getName()); }
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Returns number of values in column.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual size_t size ( ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// There are no values in columns.
2022-04-18 02:22:40 +00:00
[[nodiscard]] bool empty ( ) const { return size ( ) = = 0 ; }
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Returns value of n-th element in universal Field representation.
/// Is used in rare cases, since creation of Field instance is expensive usually.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Field operator [ ] ( size_t n ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Like the previous one, but avoids extra copying if Field is in a container, for example.
2013-01-07 06:47:15 +00:00
virtual void get ( size_t n , Field & res ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception)
/// Is used to optimize some computations (in aggregation, for example).
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual StringRef getDataAt ( size_t n ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// If column stores integers, it returns n-th element transformed to UInt64 using static_cast.
2018-12-04 12:49:21 +00:00
/// If column stores floating point numbers, bits of n-th elements are copied to lower bits of UInt64, the remaining bits are zeros.
2017-02-17 17:39:02 +00:00
/// Is used to optimize some computations (in aggregation, for example).
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual UInt64 get64 ( size_t /*n*/ ) const
2014-05-01 00:17:02 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method get64 is not supported for {} " , getName ( ) ) ;
2014-05-01 00:17:02 +00:00
}
2017-04-01 07:20:54 +00:00
2019-05-30 21:59:40 +00:00
/// If column stores native numeric type, it returns n-th element casted to Float64
/// Is used in regression methods to cast each features into uniform type
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Float64 getFloat64 ( size_t /*n*/ ) const
2019-05-30 21:59:40 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getFloat64 is not supported for {} " , getName ( ) ) ;
2019-05-30 21:59:40 +00:00
}
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Float32 getFloat32 ( size_t /*n*/ ) const
2019-12-03 01:55:46 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getFloat32 is not supported for {} " , getName ( ) ) ;
2019-12-03 01:55:46 +00:00
}
2017-07-24 07:48:26 +00:00
/** If column is numeric, return value of n-th element, casted to UInt64.
2018-10-13 14:33:43 +00:00
* For NULL values of Nullable column it is allowed to return arbitrary value .
2017-07-24 06:32:02 +00:00
* Otherwise throw an exception .
*/
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual UInt64 getUInt ( size_t /*n*/ ) const
2017-07-24 07:48:26 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getUInt is not supported for {} " , getName ( ) ) ;
2017-07-24 07:48:26 +00:00
}
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Int64 getInt ( size_t /*n*/ ) const
2017-07-24 06:32:02 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getInt is not supported for {} " , getName ( ) ) ;
2017-07-24 06:32:02 +00:00
}
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isDefaultAt ( size_t n ) const = 0 ;
[[nodiscard]] virtual bool isNullAt ( size_t /*n*/ ) const { return false ; }
2017-12-10 22:44:04 +00:00
2018-05-06 11:29:17 +00:00
/** If column is numeric, return value of n-th element, casted to bool.
* For NULL values of Nullable column returns false .
* Otherwise throw an exception .
*/
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool getBool ( size_t /*n*/ ) const
2018-05-06 11:29:17 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getBool is not supported for {} " , getName ( ) ) ;
2018-05-06 11:29:17 +00:00
}
2017-02-17 17:39:02 +00:00
/// Removes all elements outside of specified range.
/// Is used in LIMIT operation, for example.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr cut ( size_t start , size_t length ) const
2015-11-29 17:06:30 +00:00
{
2017-12-13 01:27:53 +00:00
MutablePtr res = cloneEmpty ( ) ;
2017-09-01 18:21:01 +00:00
res - > insertRangeFrom ( * this , start , length ) ;
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
return res ;
2015-11-29 17:06:30 +00:00
}
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Appends new value at the end of column (column's size is increased by 1).
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
2010-05-20 19:29:04 +00:00
virtual void insert ( const Field & x ) = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
2019-09-27 13:44:33 +00:00
virtual void insertFrom ( const IColumn & src , size_t n ) ;
2017-04-01 07:20:54 +00:00
2018-08-29 12:10:18 +00:00
/// Appends range of elements from other column with the same type.
2017-02-17 17:39:02 +00:00
/// Could be used to concatenate columns.
2015-11-29 17:06:30 +00:00
virtual void insertRangeFrom ( const IColumn & src , size_t start , size_t length ) = 0 ;
2017-04-01 07:20:54 +00:00
2019-09-18 18:44:44 +00:00
/// Appends one element from other column with the same type multiple times.
virtual void insertManyFrom ( const IColumn & src , size_t position , size_t length )
{
for ( size_t i = 0 ; i < length ; + + i )
insertFrom ( src , position ) ;
}
2021-09-16 13:57:45 +00:00
/// Appends one field multiple times. Can be optimized in inherited classes.
virtual void insertMany ( const Field & field , size_t length )
{
for ( size_t i = 0 ; i < length ; + + i )
insert ( field ) ;
}
2017-02-17 17:39:02 +00:00
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
/// Is used to optimize some computations (in aggregation, for example).
2017-12-09 10:14:45 +00:00
/// Parameter length could be ignored if column values have fixed size.
2019-05-23 13:35:26 +00:00
/// All data will be inserted as single element
2013-02-16 20:15:45 +00:00
virtual void insertData ( const char * pos , size_t length ) = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Appends "default value".
/// Is used when there are need to increase column size, but inserting value doesn't make sense.
/// For example, ColumnNullable(Nested) absolutely ignores values of nested column if it is marked as NULL.
2010-05-20 19:29:04 +00:00
virtual void insertDefault ( ) = 0 ;
2017-04-01 07:20:54 +00:00
2019-09-18 18:44:44 +00:00
/// Appends "default value" multiple times.
virtual void insertManyDefaults ( size_t length )
{
for ( size_t i = 0 ; i < length ; + + i )
insertDefault ( ) ;
}
2017-02-17 17:39:02 +00:00
/** Removes last n elements.
2019-01-22 19:56:53 +00:00
* Is used to support exception - safety of several operations .
2017-02-17 17:39:02 +00:00
* For example , sometimes insertion should be reverted if we catch an exception during operation processing .
* If column has less than n elements or n = = 0 - undefined behavior .
2016-02-16 16:39:39 +00:00
*/
virtual void popBack ( size_t n ) = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/** Serializes n-th element. Serialized element should be placed continuously inside Arena's memory.
* Serialized value can be deserialized to reconstruct original object . Is used in aggregation .
* The method is similar to getDataAt ( ) , but can work when element ' s value cannot be mapped to existing continuous memory chunk ,
* For example , to obtain unambiguous representation of Array of strings , strings data should be interleaved with their sizes .
* Parameter begin should be used with Arena : : allocContinue .
2015-10-04 03:17:36 +00:00
*/
2023-07-05 05:53:12 +00:00
virtual StringRef serializeValueIntoArena ( size_t n , Arena & arena , char const * & begin , const UInt8 * null_bit = nullptr ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
/// Returns pointer to the position after the read data.
2015-10-04 03:17:36 +00:00
virtual const char * deserializeAndInsertFromArena ( const char * pos ) = 0 ;
2017-04-01 07:20:54 +00:00
2021-02-16 21:26:06 +00:00
/// Skip previously serialized value that was serialized using IColumn::serializeValueIntoArena method.
2021-02-17 21:42:51 +00:00
/// Returns a pointer to the position after the deserialized data.
2021-02-16 21:26:06 +00:00
virtual const char * skipSerializedInArena ( const char * ) const = 0 ;
2017-02-17 17:39:02 +00:00
/// Update state of hash function with value of n-th element.
2018-10-13 14:33:43 +00:00
/// On subsequent calls of this method for sequence of column values of arbitrary types,
2017-02-17 17:39:02 +00:00
/// passed bytes to hash must identify sequence of values unambiguously.
2016-07-10 15:58:58 +00:00
virtual void updateHashWithValue ( size_t n , SipHash & hash ) const = 0 ;
2017-04-01 07:20:54 +00:00
2020-03-13 17:31:50 +00:00
/// Update hash function value. Hash is calculated for each element.
/// It's a fast weak hash function. Mainly need to scatter data between threads.
/// WeakHash32 must have the same size as column.
virtual void updateWeakHash32 ( WeakHash32 & hash ) const = 0 ;
2020-06-03 13:27:54 +00:00
/// Update state of hash with all column.
2020-05-20 22:16:08 +00:00
virtual void updateHashFast ( SipHash & hash ) const = 0 ;
2017-02-17 17:39:02 +00:00
/** Removes elements that don't match the filter.
* Is used in WHERE and HAVING operations .
* If result_size_hint > 0 , then makes advance reserve ( result_size_hint ) for the result column ;
2021-05-18 13:05:55 +00:00
* if 0 , then don ' t makes reserve ( ) ,
* otherwise ( i . e . < 0 ) , makes reserve ( ) using size of source column .
2011-08-22 08:43:52 +00:00
*/
2016-05-28 10:35:44 +00:00
using Filter = PaddedPODArray < UInt8 > ;
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr filter ( const Filter & filt , ssize_t result_size_hint ) const = 0 ;
2017-04-01 07:20:54 +00:00
2021-05-18 13:05:55 +00:00
/** Expand column by mask inplace. After expanding column will
* satisfy the following : if we filter it by given mask , we will
* get initial column . Values with indexes i : mask [ i ] = 0
* shouldn ' t be used after expanding .
2021-06-07 10:55:55 +00:00
* If inverted is true , inverted mask will be used .
2021-05-18 13:05:55 +00:00
*/
2021-06-07 10:55:55 +00:00
virtual void expand ( const Filter & /*mask*/ , bool /*inverted*/ ) = 0 ;
2021-04-27 12:49:58 +00:00
2020-08-08 00:47:03 +00:00
/// Permutes elements using specified permutation. Is used in sorting.
2017-02-17 17:39:02 +00:00
/// limit - if it isn't 0, puts only first limit elements in the result.
2016-05-28 10:35:44 +00:00
using Permutation = PaddedPODArray < size_t > ;
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr permute ( const Permutation & perm , size_t limit ) const = 0 ;
2017-04-01 07:20:54 +00:00
2018-04-18 21:00:47 +00:00
/// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
/// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr index ( const IColumn & indexes , size_t limit ) const = 0 ;
2018-04-18 21:00:47 +00:00
2018-08-29 12:10:18 +00:00
/** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
2017-02-17 17:39:02 +00:00
* Returns negative number , 0 , or positive number ( * this ) [ n ] is less , equal , greater than rhs [ m ] respectively .
2020-08-08 00:47:03 +00:00
* Is used in sorting .
2013-11-01 20:10:43 +00:00
*
2017-03-12 12:56:59 +00:00
* If one of element ' s value is NaN or NULLs , then :
* - if nan_direction_hint = = - 1 , NaN and NULLs are considered as least than everything other ;
* - if nan_direction_hint = = 1 , NaN and NULLs are considered as greatest than everything other .
* For example , if nan_direction_hint = = - 1 is used by descending sorting , NaNs will be at the end .
2013-11-01 20:10:43 +00:00
*
2017-03-12 12:56:59 +00:00
* For non Nullable and non floating point types , nan_direction_hint is ignored .
2011-09-04 00:22:19 +00:00
*/
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual int compareAt ( size_t n , size_t m , const IColumn & rhs , int nan_direction_hint ) const = 0 ;
2017-04-01 07:20:54 +00:00
2022-02-09 16:32:52 +00:00
# if USE_EMBEDDED_COMPILER
2022-05-11 13:35:37 +00:00
[[nodiscard]] virtual bool isComparatorCompilable ( ) const { return false ; }
2022-02-09 16:32:52 +00:00
2022-05-11 13:35:37 +00:00
[[nodiscard]] virtual llvm : : Value * compileComparator ( llvm : : IRBuilderBase & /*builder*/ , llvm : : Value * /*lhs*/ , llvm : : Value * /*rhs*/ , llvm : : Value * /*nan_direction_hint*/ ) const
2022-02-09 16:32:52 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method compileComparator is not supported for {} " , getName ( ) ) ;
2022-02-09 16:32:52 +00:00
}
# endif
2020-10-29 11:24:01 +00:00
/// Equivalent to compareAt, but collator is used to compare values.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual int compareAtWithCollation ( size_t , size_t , const IColumn & , int , const Collator & ) const
2020-10-29 11:24:01 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : BAD_COLLATION ,
" Collations could be specified only for String, LowCardinality(String), Nullable(String) "
" or for Array or Tuple, containing it. " ) ;
2020-10-29 11:24:01 +00:00
}
2020-06-17 11:43:55 +00:00
/// Compare the whole column with single value from rhs column.
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
/// row_indexes (if not ignored) will contain row numbers for which compare result is 0
/// see compareImpl for default implementation.
2020-06-02 00:23:41 +00:00
virtual void compareColumn ( const IColumn & rhs , size_t rhs_row_num ,
2020-06-17 11:43:55 +00:00
PaddedPODArray < UInt64 > * row_indexes , PaddedPODArray < Int8 > & compare_results ,
2020-06-02 00:23:41 +00:00
int direction , int nan_direction_hint ) const = 0 ;
2020-06-01 12:10:32 +00:00
2021-03-01 19:26:20 +00:00
/// Check if all elements in the column have equal values. Return true if column is empty.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool hasEqualValues ( ) const = 0 ;
2021-02-26 04:50:04 +00:00
2022-02-23 17:34:19 +00:00
enum class PermutationSortDirection : uint8_t
{
Ascending = 0 ,
Descending
} ;
enum class PermutationSortStability : uint8_t
{
Unstable = 0 ,
Stable
} ;
2017-02-17 17:39:02 +00:00
/** Returns a permutation that sorts elements of this column,
* i . e . perm [ i ] - th element of source column should be i - th element of sorted column .
2022-03-11 21:16:25 +00:00
* direction - permutation direction .
* stability - stability of result permutation .
2017-02-17 17:39:02 +00:00
* limit - if isn ' t 0 , then only first limit elements of the result column could be sorted .
2017-03-12 12:56:59 +00:00
* nan_direction_hint - see above .
2011-09-26 11:05:38 +00:00
*/
2022-02-23 17:34:19 +00:00
virtual void getPermutation ( PermutationSortDirection direction , PermutationSortStability stability ,
size_t limit , int nan_direction_hint , Permutation & res ) const = 0 ;
2017-04-01 07:20:54 +00:00
2020-05-25 14:33:31 +00:00
/*in updatePermutation we pass the current permutation and the intervals at which it should be sorted
* Then for each interval separately ( except for the last one , if there is a limit )
* We sort it based on data about the current column , and find all the intervals within this
* interval that had the same values in this column . we can ' t tell about these values in what order they
* should have been , we form a new array with intervals that need to be sorted
* If there is a limit , then for the last interval we do partial sorting and all that is described above ,
* but in addition we still find all the elements equal to the largest sorted , they will also need to be sorted .
*/
2022-02-23 17:34:19 +00:00
virtual void updatePermutation ( PermutationSortDirection direction , PermutationSortStability stability ,
size_t limit , int nan_direction_hint , Permutation & res , EqualRanges & equal_ranges ) const = 0 ;
2020-05-25 14:33:31 +00:00
2020-10-29 11:24:01 +00:00
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
* Supported for String , LowCardinality ( String ) , Nullable ( String ) and for Array and Tuple , containing them .
*/
2022-02-23 17:34:19 +00:00
virtual void getPermutationWithCollation ( const Collator & /*collator*/ , PermutationSortDirection /*direction*/ , PermutationSortStability /*stability*/ ,
size_t /*limit*/ , int /*nan_direction_hint*/ , Permutation & /*res*/ ) const
2020-10-29 11:24:01 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : BAD_COLLATION ,
" Collations could be specified only for String, LowCardinality(String), Nullable(String) "
" or for Array or Tuple, containing them. " ) ;
2020-10-29 11:24:01 +00:00
}
2022-02-23 17:34:19 +00:00
virtual void updatePermutationWithCollation ( const Collator & /*collator*/ , PermutationSortDirection /*direction*/ , PermutationSortStability /*stability*/ ,
size_t /*limit*/ , int /*nan_direction_hint*/ , Permutation & /*res*/ , EqualRanges & /*equal_ranges*/ ) const
2020-10-29 11:24:01 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : BAD_COLLATION ,
" Collations could be specified only for String, LowCardinality(String), Nullable(String) "
" or for Array or Tuple, containing them. " ) ;
2020-10-29 11:24:01 +00:00
}
2017-02-17 17:39:02 +00:00
/** Copies each element according offsets parameter.
* ( i - th element should be copied offsets [ i ] - offsets [ i - 1 ] times . )
* It is necessary in ARRAY JOIN operation .
2012-08-27 05:13:14 +00:00
*/
2017-12-15 21:32:25 +00:00
using Offset = UInt64 ;
using Offsets = PaddedPODArray < Offset > ;
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr replicate ( const Offsets & offsets ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-11 20:20:57 +00:00
/** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
* Selector must contain values from 0 to num_columns - 1.
* For default implementation , see scatterImpl .
*/
using ColumnIndex = UInt64 ;
using Selector = PaddedPODArray < ColumnIndex > ;
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual std : : vector < MutablePtr > scatter ( ColumnIndex num_columns , const Selector & selector ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-07-06 13:54:55 +00:00
/// Insert data from several other columns according to source mask (used in vertical merge).
/// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop
/// (descendants should call gatherer_stream.gather(*this) to implement this function.)
/// TODO: interface decoupled from ColumnGathererStream that allows non-generic specializations.
virtual void gather ( ColumnGathererStream & gatherer_stream ) = 0 ;
2017-02-17 17:39:02 +00:00
/** Computes minimum and maximum element of the column.
2019-01-22 19:56:53 +00:00
* In addition to numeric types , the function is completely implemented for Date and DateTime .
* For strings and arrays function should return default value .
2017-02-17 17:39:02 +00:00
* ( except for constant columns ; they should return value of the constant ) .
* If column is empty function should return default value .
2013-09-06 20:28:22 +00:00
*/
2016-08-10 19:12:29 +00:00
virtual void getExtremes ( Field & min , Field & max ) const = 0 ;
2017-04-01 07:20:54 +00:00
2017-02-17 17:39:02 +00:00
/// Reserves memory for specified amount of elements. If reservation isn't possible, does nothing.
/// It affects performance only (not correctness).
2018-06-03 16:51:31 +00:00
virtual void reserve ( size_t /*n*/ ) { }
2017-04-01 07:20:54 +00:00
2023-05-22 18:40:35 +00:00
/// Requests the removal of unused capacity.
/// It is a non-binding request to reduce the capacity of the underlying container to its size.
2023-05-24 14:20:34 +00:00
virtual MutablePtr shrinkToFit ( ) const
2023-05-22 18:40:35 +00:00
{
2023-09-14 01:49:20 +00:00
return cloneResized ( size ( ) ) ;
2023-05-22 18:40:35 +00:00
}
2022-03-05 12:50:47 +00:00
/// If we have another column as a source (owner of data), copy all data to ourself and reset source.
virtual void ensureOwnership ( ) { }
2017-02-17 17:39:02 +00:00
/// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual size_t byteSize ( ) const = 0 ;
2017-04-01 07:20:54 +00:00
2021-01-02 22:58:10 +00:00
/// Size of single value in memory (for accounting purposes)
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual size_t byteSizeAt ( size_t /*n*/ ) const = 0 ;
2021-01-02 22:58:10 +00:00
2017-02-17 17:39:02 +00:00
/// Size of memory, allocated for column.
/// This is greater or equals to byteSize due to memory reservation in containers.
2018-11-12 17:44:43 +00:00
/// Zero, if could not be determined.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual size_t allocatedBytes ( ) const = 0 ;
2017-04-01 07:20:54 +00:00
2019-03-10 03:16:51 +00:00
/// Make memory region readonly with mprotect if it is large enough.
/// The operation is slow and performed only for debug builds.
virtual void protect ( ) { }
2017-12-10 21:05:21 +00:00
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
/// Shallow: doesn't do recursive calls; don't do call for itself.
2022-11-16 00:46:57 +00:00
using MutableColumnCallback = std : : function < void ( WrappedPtr & ) > ;
2023-06-16 07:16:50 +00:00
virtual void forEachSubcolumn ( MutableColumnCallback ) { }
/// Default implementation calls the mutable overload using const_cast.
using ColumnCallback = std : : function < void ( const WrappedPtr & ) > ;
virtual void forEachSubcolumn ( ColumnCallback ) const ;
2017-12-07 12:09:55 +00:00
2022-09-14 18:01:49 +00:00
/// Similar to forEachSubcolumn but it also do recursive calls.
2022-11-16 00:46:57 +00:00
/// In recursive calls it's prohibited to replace pointers
/// to subcolumns, so we use another callback function.
using RecursiveMutableColumnCallback = std : : function < void ( IColumn & ) > ;
2023-06-16 07:16:50 +00:00
virtual void forEachSubcolumnRecursively ( RecursiveMutableColumnCallback ) { }
/// Default implementation calls the mutable overload using const_cast.
using RecursiveColumnCallback = std : : function < void ( const IColumn & ) > ;
virtual void forEachSubcolumnRecursively ( RecursiveColumnCallback ) const ;
2017-12-07 12:09:55 +00:00
2019-03-14 23:10:51 +00:00
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool structureEquals ( const IColumn & ) const
2019-03-14 23:10:51 +00:00
{
2023-01-23 21:13:58 +00:00
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method structureEquals is not supported for {} " , getName ( ) ) ;
2019-03-14 23:10:51 +00:00
}
2023-02-09 18:06:15 +00:00
/// Returns ratio of values in column, that are equal to default value of column.
2021-06-07 15:34:22 +00:00
/// Checks only @sample_ratio ratio of rows.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual double getRatioOfDefaultRows ( double sample_ratio = 1.0 ) const = 0 ; /// NOLINT
2021-03-09 17:25:23 +00:00
2023-02-09 18:06:15 +00:00
/// Returns number of values in column, that are equal to default value of column.
[[nodiscard]] virtual UInt64 getNumberOfDefaultRows ( ) const = 0 ;
2021-05-21 00:57:11 +00:00
/// Returns indices of values in column, that not equal to default value of column.
2021-06-07 15:34:22 +00:00
virtual void getIndicesOfNonDefaultRows ( Offsets & indices , size_t from , size_t limit ) const = 0 ;
2021-04-01 18:18:28 +00:00
2021-05-21 00:57:11 +00:00
/// Returns column with @total_size elements.
/// In result column values from current column are at positions from @offsets.
2021-09-16 13:57:45 +00:00
/// Other values are filled by @default_value.
2021-05-21 00:57:11 +00:00
/// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr createWithOffsets ( const Offsets & offsets , const Field & default_field , size_t total_rows , size_t shift ) const ;
2021-04-01 18:18:28 +00:00
2021-02-07 01:41:31 +00:00
/// Compress column in memory to some representation that allows to decompress it back.
2021-02-11 21:26:14 +00:00
/// Return itself if compression is not applicable for this column type.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr compress ( ) const
2021-02-07 01:41:31 +00:00
{
2021-02-11 21:26:14 +00:00
/// No compression by default.
return getPtr ( ) ;
}
/// If it's CompressedColumn, decompress it and return.
/// Otherwise return itself.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual Ptr decompress ( ) const
2021-02-11 21:26:14 +00:00
{
return getPtr ( ) ;
2021-02-07 01:41:31 +00:00
}
2022-05-06 14:44:00 +00:00
/// Some columns may require finalization before using of other operations.
virtual void finalize ( ) { }
2022-05-09 16:12:04 +00:00
virtual bool isFinalized ( ) const { return true ; }
MutablePtr cloneFinalized ( ) const
{
auto finalized = IColumn : : mutate ( getPtr ( ) ) ;
finalized - > finalize ( ) ;
return finalized ;
}
2017-12-14 01:43:19 +00:00
2022-04-18 02:22:40 +00:00
[[nodiscard]] static MutablePtr mutate ( Ptr ptr )
2017-12-14 01:43:19 +00:00
{
2020-05-17 10:51:52 +00:00
MutablePtr res = ptr - > shallowMutate ( ) ; /// Now use_count is 2.
ptr . reset ( ) ; /// Reset use_count to 1.
2020-05-17 20:26:53 +00:00
res - > forEachSubcolumn ( [ ] ( WrappedPtr & subcolumn ) { subcolumn = IColumn : : mutate ( std : : move ( subcolumn ) . detach ( ) ) ; } ) ;
2017-12-14 01:43:19 +00:00
return res ;
}
2017-12-09 17:32:18 +00:00
/** Some columns can contain another columns inside.
* So , we have a tree of columns . But not all combinations are possible .
* There are the following rules :
*
* ColumnConst may be only at top . It cannot be inside any column .
* ColumnNullable can contain only simple columns .
*/
2017-12-09 07:32:32 +00:00
/// Various properties on behaviour of column type.
2019-07-01 11:44:19 +00:00
/// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isNullable ( ) const { return false ; }
2019-07-01 11:44:19 +00:00
2017-12-09 10:14:45 +00:00
/// It's a special kind of column, that contain single value, but is not a ColumnConst.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isDummy ( ) const { return false ; }
2017-12-09 07:32:32 +00:00
2017-12-10 21:05:21 +00:00
/** Memory layout properties.
*
* Each value of a column can be placed in memory contiguously or not .
*
* Example : simple columns like UInt64 or FixedString store their values contiguously in single memory buffer .
*
* Example : Tuple store values of each component in separate subcolumn , so the values of Tuples with at least two components are not contiguous .
* Another example is Nullable . Each value have null flag , that is stored separately , so the value is not contiguous in memory .
*
* There are some important cases , when values are not stored contiguously , but for each value , you can get contiguous memory segment ,
* that will unambiguously identify the value . In this case , methods getDataAt and insertData are implemented .
* Example : String column : bytes of strings are stored concatenated in one memory buffer
* and offsets to that buffer are stored in another buffer . The same is for Array of fixed - size contiguous elements .
*
* To avoid confusion between these cases , we don ' t have isContiguous method .
*/
2017-12-09 10:14:45 +00:00
/// Values in column have fixed size (including the case when values span many memory segments).
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool valuesHaveFixedSize ( ) const { return isFixedAndContiguous ( ) ; }
2017-12-09 07:32:32 +00:00
2017-12-09 10:14:45 +00:00
/// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isFixedAndContiguous ( ) const { return false ; }
2017-12-09 07:32:32 +00:00
2018-04-25 15:16:48 +00:00
/// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
2023-01-23 21:13:58 +00:00
[[nodiscard]] virtual std : : string_view getRawData ( ) const { throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Column {} is not a contiguous block of memory " , getName ( ) ) ; }
2018-04-25 15:16:48 +00:00
2017-12-09 10:14:45 +00:00
/// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
2023-01-23 21:13:58 +00:00
[[nodiscard]] virtual size_t sizeOfValueIfFixed ( ) const { throw Exception ( ErrorCodes : : CANNOT_GET_SIZE_OF_FIELD , " Values of column {} are not fixed size. " , getName ( ) ) ; }
2017-12-09 10:14:45 +00:00
/// Column is ColumnVector of numbers or ColumnConst of it. Note that Nullable columns are not numeric.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isNumeric ( ) const { return false ; }
2017-12-09 07:32:32 +00:00
2017-12-09 10:14:45 +00:00
/// If the only value column can contain is NULL.
/// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool onlyNull ( ) const { return false ; }
2017-12-09 07:32:32 +00:00
2017-12-09 10:14:45 +00:00
/// Can be inside ColumnNullable.
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool canBeInsideNullable ( ) const { return false ; }
2017-12-09 07:32:32 +00:00
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool lowCardinality ( ) const { return false ; }
2018-04-17 17:47:27 +00:00
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isSparse ( ) const { return false ; }
2021-03-31 01:08:27 +00:00
2022-04-18 02:22:40 +00:00
[[nodiscard]] virtual bool isCollationSupported ( ) const { return false ; }
2020-10-29 11:24:01 +00:00
2019-01-04 12:10:00 +00:00
virtual ~ IColumn ( ) = default ;
IColumn ( ) = default ;
IColumn ( const IColumn & ) = default ;
2017-02-11 20:20:57 +00:00
2017-12-10 21:05:21 +00:00
/** Print column name, size, and recursively print all subcolumns.
*/
2022-04-18 02:22:40 +00:00
[[nodiscard]] String dumpStructure ( ) const ;
2017-12-07 22:11:51 +00:00
2017-02-11 20:20:57 +00:00
protected :
/// Template is to devirtualize calls to insertFrom method.
/// In derived classes (that use final keyword), implement scatter method as call to scatterImpl.
template < typename Derived >
2019-10-07 18:56:03 +00:00
std : : vector < MutablePtr > scatterImpl ( ColumnIndex num_columns , const Selector & selector ) const ;
2020-06-01 12:10:32 +00:00
2020-06-17 11:43:55 +00:00
template < typename Derived , bool reversed , bool use_indexes >
2020-06-02 00:23:41 +00:00
void compareImpl ( const Derived & rhs , size_t rhs_row_num ,
2020-06-17 11:43:55 +00:00
PaddedPODArray < UInt64 > * row_indexes ,
PaddedPODArray < Int8 > & compare_results ,
int nan_direction_hint ) const ;
template < typename Derived >
void doCompareColumn ( const Derived & rhs , size_t rhs_row_num ,
PaddedPODArray < UInt64 > * row_indexes ,
PaddedPODArray < Int8 > & compare_results ,
int direction , int nan_direction_hint ) const ;
2021-02-26 04:50:04 +00:00
template < typename Derived >
bool hasEqualValuesImpl ( ) const ;
2021-06-07 13:50:27 +00:00
2021-06-07 15:34:22 +00:00
/// Template is to devirtualize calls to 'isDefaultAt' method.
2021-06-07 13:50:27 +00:00
template < typename Derived >
double getRatioOfDefaultRowsImpl ( double sample_ratio ) const ;
2021-06-07 15:34:22 +00:00
2023-02-09 18:06:15 +00:00
template < typename Derived >
UInt64 getNumberOfDefaultRowsImpl ( ) const ;
2021-06-07 15:34:22 +00:00
template < typename Derived >
void getIndicesOfNonDefaultRowsImpl ( Offsets & indices , size_t from , size_t limit ) const ;
2021-10-14 16:44:08 +00:00
2022-02-23 17:34:19 +00:00
template < typename Compare , typename Sort , typename PartialSort >
void getPermutationImpl ( size_t limit , Permutation & res , Compare compare ,
Sort full_sort , PartialSort partial_sort ) const ;
template < typename Compare , typename Equals , typename Sort , typename PartialSort >
void updatePermutationImpl ( size_t limit , Permutation & res , EqualRanges & equal_ranges , Compare compare , Equals equals ,
Sort full_sort , PartialSort partial_sort ) const ;
2010-03-12 18:25:35 +00:00
} ;
2017-12-13 01:27:53 +00:00
using ColumnPtr = IColumn : : Ptr ;
2018-11-26 00:56:50 +00:00
using MutableColumnPtr = IColumn : : MutablePtr ;
2017-12-13 01:27:53 +00:00
using Columns = std : : vector < ColumnPtr > ;
2017-12-14 04:25:22 +00:00
using MutableColumns = std : : vector < MutableColumnPtr > ;
2017-12-13 01:27:53 +00:00
using ColumnRawPtrs = std : : vector < const IColumn * > ;
2010-03-12 18:25:35 +00:00
2021-02-07 01:41:31 +00:00
2018-03-21 19:39:14 +00:00
template < typename . . . Args >
struct IsMutableColumns ;
template < typename Arg , typename . . . Args >
struct IsMutableColumns < Arg , Args . . . >
{
static const bool value = std : : is_assignable < MutableColumnPtr & & , Arg > : : value & & IsMutableColumns < Args . . . > : : value ;
} ;
template < >
struct IsMutableColumns < > { static const bool value = true ; } ;
2019-06-27 18:50:20 +00:00
template < typename Type >
const Type * checkAndGetColumn ( const IColumn & column )
{
return typeid_cast < const Type * > ( & column ) ;
}
template < typename Type >
const Type * checkAndGetColumn ( const IColumn * column )
{
return typeid_cast < const Type * > ( column ) ;
}
template < typename Type >
bool checkColumn ( const IColumn & column )
{
return checkAndGetColumn < Type > ( & column ) ;
}
template < typename Type >
bool checkColumn ( const IColumn * column )
{
return checkAndGetColumn < Type > ( column ) ;
}
2019-07-01 11:44:19 +00:00
/// True if column's an ColumnConst instance. It's just a syntax sugar for type check.
bool isColumnConst ( const IColumn & column ) ;
/// True if column's an ColumnNullable instance. It's just a syntax sugar for type check.
bool isColumnNullable ( const IColumn & column ) ;
2019-06-27 19:28:52 +00:00
2010-03-12 18:25:35 +00:00
}