2011-08-09 15:57:33 +00:00
|
|
|
#pragma once
|
2010-03-01 16:59:51 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/BlockInfo.h>
|
|
|
|
#include <Core/ColumnWithTypeAndName.h>
|
|
|
|
#include <Core/ColumnsWithTypeAndName.h>
|
2021-04-10 23:33:54 +00:00
|
|
|
#include <Core/NamesAndTypes.h>
|
|
|
|
|
|
|
|
#include <initializer_list>
|
|
|
|
#include <list>
|
|
|
|
#include <map>
|
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
2010-03-01 16:59:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-01-02 20:42:49 +00:00
|
|
|
/** Container for set of columns for bunch of rows in memory.
|
|
|
|
* This is unit of data processing.
|
|
|
|
* Also contains metadata - data types of columns and their names
|
|
|
|
* (either original names from a table, or generated names during temporary calculations).
|
2018-10-13 14:33:43 +00:00
|
|
|
* Allows to insert, remove columns in arbitrary position, to change order of columns.
|
2010-03-01 16:59:51 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
class Block
|
|
|
|
{
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
using Container = ColumnsWithTypeAndName;
|
2020-04-01 12:16:39 +00:00
|
|
|
using IndexByName = std::unordered_map<String, size_t>;
|
2016-08-04 21:40:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Container data;
|
|
|
|
IndexByName index_by_name;
|
2010-03-01 16:59:51 +00:00
|
|
|
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
BlockInfo info;
|
2015-01-03 03:18:49 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Block() = default;
|
|
|
|
Block(std::initializer_list<ColumnWithTypeAndName> il);
|
|
|
|
Block(const ColumnsWithTypeAndName & data_);
|
2014-06-25 00:17:51 +00:00
|
|
|
|
2017-04-30 13:50:16 +00:00
|
|
|
/// insert the column at the specified position
|
2017-04-01 07:20:54 +00:00
|
|
|
void insert(size_t position, const ColumnWithTypeAndName & elem);
|
|
|
|
void insert(size_t position, ColumnWithTypeAndName && elem);
|
2017-04-30 13:50:16 +00:00
|
|
|
/// insert the column to the end
|
2017-04-01 07:20:54 +00:00
|
|
|
void insert(const ColumnWithTypeAndName & elem);
|
|
|
|
void insert(ColumnWithTypeAndName && elem);
|
2017-04-30 13:50:16 +00:00
|
|
|
/// insert the column to the end, if there is no column with that name yet
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertUnique(const ColumnWithTypeAndName & elem);
|
|
|
|
void insertUnique(ColumnWithTypeAndName && elem);
|
2017-04-30 13:50:16 +00:00
|
|
|
/// remove the column at the specified position
|
2017-04-01 07:20:54 +00:00
|
|
|
void erase(size_t position);
|
2018-11-14 15:23:00 +00:00
|
|
|
/// remove the columns at the specified positions
|
|
|
|
void erase(const std::set<size_t> & positions);
|
2017-04-30 13:50:16 +00:00
|
|
|
/// remove the column with the specified name
|
2017-04-01 07:20:54 +00:00
|
|
|
void erase(const String & name);
|
2010-03-01 16:59:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// References are invalidated after calling functions above.
|
2016-08-04 21:40:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ColumnWithTypeAndName & getByPosition(size_t position) { return data[position]; }
|
|
|
|
const ColumnWithTypeAndName & getByPosition(size_t position) const { return data[position]; }
|
2010-03-01 16:59:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ColumnWithTypeAndName & safeGetByPosition(size_t position);
|
|
|
|
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
|
2013-03-29 01:47:34 +00:00
|
|
|
|
2020-04-01 12:16:39 +00:00
|
|
|
ColumnWithTypeAndName* findByName(const std::string & name)
|
|
|
|
{
|
|
|
|
return const_cast<ColumnWithTypeAndName *>(
|
|
|
|
const_cast<const Block *>(this)->findByName(name));
|
|
|
|
}
|
|
|
|
|
|
|
|
const ColumnWithTypeAndName* findByName(const std::string & name) const;
|
|
|
|
|
|
|
|
ColumnWithTypeAndName & getByName(const std::string & name)
|
|
|
|
{
|
|
|
|
return const_cast<ColumnWithTypeAndName &>(
|
|
|
|
const_cast<const Block *>(this)->getByName(name));
|
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const ColumnWithTypeAndName & getByName(const std::string & name) const;
|
2010-03-01 16:59:51 +00:00
|
|
|
|
2017-12-09 12:23:09 +00:00
|
|
|
Container::iterator begin() { return data.begin(); }
|
|
|
|
Container::iterator end() { return data.end(); }
|
|
|
|
Container::const_iterator begin() const { return data.begin(); }
|
|
|
|
Container::const_iterator end() const { return data.end(); }
|
|
|
|
Container::const_iterator cbegin() const { return data.cbegin(); }
|
|
|
|
Container::const_iterator cend() const { return data.cend(); }
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool has(const std::string & name) const;
|
2011-12-12 06:15:34 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t getPositionByName(const std::string & name) const;
|
2011-08-12 20:39:42 +00:00
|
|
|
|
2017-12-18 02:37:08 +00:00
|
|
|
const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const;
|
2017-12-25 21:57:29 +00:00
|
|
|
NamesAndTypesList getNamesAndTypesList() const;
|
2017-12-18 02:37:08 +00:00
|
|
|
Names getNames() const;
|
2019-01-22 12:33:56 +00:00
|
|
|
DataTypes getDataTypes() const;
|
2011-11-06 02:29:13 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
|
|
|
|
size_t rows() const;
|
2013-06-08 20:19:29 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t columns() const { return data.size(); }
|
2010-03-04 19:20:28 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Checks that every column in block is not nullptr and has same number of elements.
|
2019-09-23 19:26:04 +00:00
|
|
|
void checkNumberOfRows(bool allow_null_columns = false) const;
|
2017-01-02 20:42:49 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Approximate number of bytes in memory - for profiling and limits.
|
|
|
|
size_t bytes() const;
|
2012-05-17 19:15:53 +00:00
|
|
|
|
2017-07-13 14:47:46 +00:00
|
|
|
/// Approximate number of allocated bytes in memory - for profiling and limits.
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t allocatedBytes() const;
|
2017-07-13 14:47:46 +00:00
|
|
|
|
2018-12-11 17:43:12 +00:00
|
|
|
operator bool() const { return !!columns(); }
|
|
|
|
bool operator!() const { return !this->operator bool(); }
|
2011-08-14 00:49:30 +00:00
|
|
|
|
2017-04-30 13:50:16 +00:00
|
|
|
/** Get a list of column names separated by commas. */
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string dumpNames() const;
|
2011-09-19 03:34:23 +00:00
|
|
|
|
2021-03-29 00:35:41 +00:00
|
|
|
/** List of names, types and lengths of columns. Designed for debugging. */
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string dumpStructure() const;
|
2013-12-05 12:46:29 +00:00
|
|
|
|
2020-11-03 11:28:28 +00:00
|
|
|
/** List of column names and positions from index */
|
|
|
|
std::string dumpIndex() const;
|
|
|
|
|
2017-04-30 13:50:16 +00:00
|
|
|
/** Get the same block, but empty. */
|
2017-04-01 07:20:54 +00:00
|
|
|
Block cloneEmpty() const;
|
2014-06-25 00:17:51 +00:00
|
|
|
|
2018-06-15 05:33:39 +00:00
|
|
|
Columns getColumns() const;
|
|
|
|
void setColumns(const Columns & columns);
|
|
|
|
Block cloneWithColumns(const Columns & columns) const;
|
2018-07-20 12:30:04 +00:00
|
|
|
Block cloneWithoutColumns() const;
|
2021-01-26 20:49:52 +00:00
|
|
|
Block cloneWithCutColumns(size_t start, size_t length) const;
|
2018-06-15 05:33:39 +00:00
|
|
|
|
2017-12-14 20:58:18 +00:00
|
|
|
/** Get empty columns with the same types as in block. */
|
|
|
|
MutableColumns cloneEmptyColumns() const;
|
|
|
|
|
2018-03-20 10:58:16 +00:00
|
|
|
/** Get columns from block for mutation. Columns in block will be nullptr. */
|
2018-09-09 02:23:24 +00:00
|
|
|
MutableColumns mutateColumns();
|
2017-12-15 18:23:05 +00:00
|
|
|
|
2017-12-14 20:58:18 +00:00
|
|
|
/** Replace columns in a block */
|
|
|
|
void setColumns(MutableColumns && columns);
|
|
|
|
Block cloneWithColumns(MutableColumns && columns) const;
|
|
|
|
|
2017-04-30 13:50:16 +00:00
|
|
|
/** Get a block with columns that have been rearranged in the order of their names. */
|
2017-04-01 07:20:54 +00:00
|
|
|
Block sortColumns() const;
|
2015-02-13 20:37:30 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void clear();
|
|
|
|
void swap(Block & other) noexcept;
|
2016-07-05 16:23:37 +00:00
|
|
|
|
2017-04-13 19:30:51 +00:00
|
|
|
/** Updates SipHash of the Block, using update method of columns.
|
2017-07-13 14:47:46 +00:00
|
|
|
* Returns hash for block, that could be used to differentiate blocks
|
2017-04-13 19:30:51 +00:00
|
|
|
* with same structure, but different data.
|
|
|
|
*/
|
2017-04-13 16:55:35 +00:00
|
|
|
void updateHash(SipHash & hash) const;
|
|
|
|
|
2016-08-04 21:40:20 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
void eraseImpl(size_t position);
|
|
|
|
void initializeIndexByName();
|
2020-10-09 07:41:28 +00:00
|
|
|
|
|
|
|
/// This is needed to allow function execution over data.
|
|
|
|
/// It is safe because functions does not change column names, so index is unaffected.
|
|
|
|
/// It is temporary.
|
2020-10-13 08:16:47 +00:00
|
|
|
friend class ExpressionActions;
|
2020-10-09 07:41:28 +00:00
|
|
|
friend class ActionsDAG;
|
2010-03-01 16:59:51 +00:00
|
|
|
};
|
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
using BlockPtr = std::shared_ptr<Block>;
|
2016-05-28 10:29:17 +00:00
|
|
|
using Blocks = std::vector<Block>;
|
|
|
|
using BlocksList = std::list<Block>;
|
2019-05-28 21:17:48 +00:00
|
|
|
using BlocksPtr = std::shared_ptr<Blocks>;
|
|
|
|
using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
|
2016-12-21 00:18:11 +00:00
|
|
|
|
2020-01-15 20:33:29 +00:00
|
|
|
/// Extends block with extra data in derived classes
|
|
|
|
struct ExtraBlock
|
|
|
|
{
|
|
|
|
Block block;
|
2020-06-16 20:13:18 +00:00
|
|
|
|
|
|
|
bool empty() const { return !block; }
|
2020-01-15 20:33:29 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
|
|
|
|
|
2018-02-20 01:14:38 +00:00
|
|
|
/// Compare number of columns, data types, column types, column names, and values of constant columns.
|
2013-06-21 23:27:26 +00:00
|
|
|
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
|
|
|
|
|
2018-02-20 01:14:38 +00:00
|
|
|
/// Throw exception when blocks are different.
|
|
|
|
void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description);
|
|
|
|
|
2021-04-20 11:55:23 +00:00
|
|
|
/// Actual header is compatible to desired if block have equal structure except constants.
|
|
|
|
/// It is allowed when column from actual header is constant, but in desired is not.
|
|
|
|
/// If both columns are constant, it is checked that they have the same value.
|
|
|
|
bool isCompatibleHeader(const Block & actual, const Block & desired);
|
|
|
|
void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description);
|
|
|
|
|
2018-02-20 01:14:38 +00:00
|
|
|
/// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns.
|
2016-12-21 00:18:11 +00:00
|
|
|
void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);
|
|
|
|
|
2010-03-01 16:59:51 +00:00
|
|
|
}
|