ClickHouse/src/Core/Block.h

188 lines
6.5 KiB
C++
Raw Normal View History

2011-08-09 15:57:33 +00:00
#pragma once
2010-03-01 16:59:51 +00:00
#include <vector>
#include <list>
#include <set>
2010-03-01 16:59:51 +00:00
#include <map>
2014-09-12 16:05:29 +00:00
#include <initializer_list>
2010-03-01 16:59:51 +00:00
#include <Core/BlockInfo.h>
#include <Core/NamesAndTypes.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/ColumnsWithTypeAndName.h>
2010-03-01 16:59:51 +00:00
namespace DB
{
/** Container for set of columns for bunch of rows in memory.
* This is unit of data processing.
* Also contains metadata - data types of columns and their names
* (either original names from a table, or generated names during temporary calculations).
2018-10-13 14:33:43 +00:00
* Allows to insert, remove columns in arbitrary position, to change order of columns.
2010-03-01 16:59:51 +00:00
*/
class Context;
2010-03-01 16:59:51 +00:00
class Block
{
private:
using Container = ColumnsWithTypeAndName;
2020-04-01 12:16:39 +00:00
using IndexByName = std::unordered_map<String, size_t>;
2016-08-04 21:40:20 +00:00
Container data;
IndexByName index_by_name;
2010-03-01 16:59:51 +00:00
public:
BlockInfo info;
Block() = default;
Block(std::initializer_list<ColumnWithTypeAndName> il);
Block(const ColumnsWithTypeAndName & data_);
2017-04-30 13:50:16 +00:00
/// insert the column at the specified position
void insert(size_t position, const ColumnWithTypeAndName & elem);
void insert(size_t position, ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// insert the column to the end
void insert(const ColumnWithTypeAndName & elem);
void insert(ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// insert the column to the end, if there is no column with that name yet
void insertUnique(const ColumnWithTypeAndName & elem);
void insertUnique(ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// remove the column at the specified position
void erase(size_t position);
/// remove the columns at the specified positions
void erase(const std::set<size_t> & positions);
2017-04-30 13:50:16 +00:00
/// remove the column with the specified name
void erase(const String & name);
2010-03-01 16:59:51 +00:00
/// References are invalidated after calling functions above.
2016-08-04 21:40:20 +00:00
ColumnWithTypeAndName & getByPosition(size_t position) { return data[position]; }
const ColumnWithTypeAndName & getByPosition(size_t position) const { return data[position]; }
2010-03-01 16:59:51 +00:00
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
2020-04-01 12:16:39 +00:00
ColumnWithTypeAndName* findByName(const std::string & name)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name));
}
const ColumnWithTypeAndName* findByName(const std::string & name) const;
ColumnWithTypeAndName & getByName(const std::string & name)
{
return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name));
}
const ColumnWithTypeAndName & getByName(const std::string & name) const;
2010-03-01 16:59:51 +00:00
Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); }
Container::const_iterator begin() const { return data.begin(); }
Container::const_iterator end() const { return data.end(); }
Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name) const;
2011-12-12 06:15:34 +00:00
size_t getPositionByName(const std::string & name) const;
2011-08-12 20:39:42 +00:00
const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const;
NamesAndTypesList getNamesAndTypesList() const;
Names getNames() const;
DataTypes getDataTypes() const;
2011-11-06 02:29:13 +00:00
/// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
size_t rows() const;
2013-06-08 20:19:29 +00:00
size_t columns() const { return data.size(); }
2010-03-04 19:20:28 +00:00
/// Checks that every column in block is not nullptr and has same number of elements.
void checkNumberOfRows(bool allow_null_columns = false) const;
/// Approximate number of bytes in memory - for profiling and limits.
size_t bytes() const;
2012-05-17 19:15:53 +00:00
2017-07-13 14:47:46 +00:00
/// Approximate number of allocated bytes in memory - for profiling and limits.
size_t allocatedBytes() const;
2017-07-13 14:47:46 +00:00
operator bool() const { return !!columns(); }
bool operator!() const { return !this->operator bool(); }
2011-08-14 00:49:30 +00:00
2017-04-30 13:50:16 +00:00
/** Get a list of column names separated by commas. */
std::string dumpNames() const;
2011-09-19 03:34:23 +00:00
2017-04-30 13:50:16 +00:00
/** List of names, types and lengths of columns. Designed for debugging. */
std::string dumpStructure() const;
2017-04-30 13:50:16 +00:00
/** Get the same block, but empty. */
Block cloneEmpty() const;
Columns getColumns() const;
void setColumns(const Columns & columns);
Block cloneWithColumns(const Columns & columns) const;
2018-07-20 12:30:04 +00:00
Block cloneWithoutColumns() const;
/** Get empty columns with the same types as in block. */
MutableColumns cloneEmptyColumns() const;
2018-03-20 10:58:16 +00:00
/** Get columns from block for mutation. Columns in block will be nullptr. */
2018-09-09 02:23:24 +00:00
MutableColumns mutateColumns();
/** Replace columns in a block */
void setColumns(MutableColumns && columns);
Block cloneWithColumns(MutableColumns && columns) const;
2017-04-30 13:50:16 +00:00
/** Get a block with columns that have been rearranged in the order of their names. */
Block sortColumns() const;
void clear();
void swap(Block & other) noexcept;
2017-04-13 19:30:51 +00:00
/** Updates SipHash of the Block, using update method of columns.
2017-07-13 14:47:46 +00:00
* Returns hash for block, that could be used to differentiate blocks
2017-04-13 19:30:51 +00:00
* with same structure, but different data.
*/
void updateHash(SipHash & hash) const;
2016-08-04 21:40:20 +00:00
private:
void eraseImpl(size_t position);
void initializeIndexByName();
/// This is needed to allow function execution over data.
/// It is safe because functions does not change column names, so index is unaffected.
/// It is temporary.
friend struct ExpressionAction;
friend class ActionsDAG;
2010-03-01 16:59:51 +00:00
};
using Blocks = std::vector<Block>;
using BlocksList = std::list<Block>;
using BlocksPtr = std::shared_ptr<Blocks>;
using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
2016-12-21 00:18:11 +00:00
2020-01-15 20:33:29 +00:00
/// Extends block with extra data in derived classes
struct ExtraBlock
{
Block block;
bool empty() const { return !block; }
2020-01-15 20:33:29 +00:00
};
using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
/// Compare number of columns, data types, column types, column names, and values of constant columns.
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
/// Throw exception when blocks are different.
void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description);
/// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns.
2016-12-21 00:18:11 +00:00
void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);
2010-03-01 16:59:51 +00:00
}