ClickHouse/dbms/src/Core/Block.h

165 lines
5.4 KiB
C++
Raw Normal View History

2011-08-09 15:57:33 +00:00
#pragma once
2010-03-01 16:59:51 +00:00
#include <vector>
#include <map>
2014-09-12 16:05:29 +00:00
#include <initializer_list>
2010-03-01 16:59:51 +00:00
#include <Common/Exception.h>
#include <Core/BlockInfo.h>
#include <Core/NamesAndTypes.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/ColumnNumbers.h>
#include <Common/Exception.h>
2010-03-01 16:59:51 +00:00
namespace DB
{
/** Container for set of columns for bunch of rows in memory.
* This is unit of data processing.
* Also contains metadata - data types of columns and their names
* (either original names from a table, or generated names during temporary calculations).
* Allows to insert, remove columns in arbitary position, to change order of columns.
2010-03-01 16:59:51 +00:00
*/
class Context;
2010-03-01 16:59:51 +00:00
class Block
{
private:
using Container = ColumnsWithTypeAndName;
using IndexByName = std::map<String, size_t>;
2016-08-04 21:40:20 +00:00
Container data;
IndexByName index_by_name;
2010-03-01 16:59:51 +00:00
public:
BlockInfo info;
Block() = default;
Block(std::initializer_list<ColumnWithTypeAndName> il);
Block(const ColumnsWithTypeAndName & data_);
2017-04-30 13:50:16 +00:00
/// insert the column at the specified position
void insert(size_t position, const ColumnWithTypeAndName & elem);
void insert(size_t position, ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// insert the column to the end
void insert(const ColumnWithTypeAndName & elem);
void insert(ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// insert the column to the end, if there is no column with that name yet
void insertUnique(const ColumnWithTypeAndName & elem);
void insertUnique(ColumnWithTypeAndName && elem);
2017-04-30 13:50:16 +00:00
/// remove the column at the specified position
void erase(size_t position);
2017-04-30 13:50:16 +00:00
/// remove the column with the specified name
void erase(const String & name);
2017-04-30 13:50:16 +00:00
/// Adds missing columns to the block with default values
void addDefaults(const NamesAndTypesList & required_columns);
2010-03-01 16:59:51 +00:00
/// References are invalidated after calling functions above.
2016-08-04 21:40:20 +00:00
ColumnWithTypeAndName & getByPosition(size_t position) { return data[position]; }
const ColumnWithTypeAndName & getByPosition(size_t position) const { return data[position]; }
2010-03-01 16:59:51 +00:00
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName & getByName(const std::string & name);
const ColumnWithTypeAndName & getByName(const std::string & name) const;
2010-03-01 16:59:51 +00:00
bool has(const std::string & name) const;
2011-12-12 06:15:34 +00:00
size_t getPositionByName(const std::string & name) const;
2011-08-12 20:39:42 +00:00
ColumnsWithTypeAndName getColumns() const;
NamesAndTypesList getColumnsList() const;
2011-11-06 02:29:13 +00:00
/// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
size_t rows() const;
2013-06-08 20:19:29 +00:00
size_t columns() const { return data.size(); }
2010-03-04 19:20:28 +00:00
/// Checks that every column in block is not nullptr and has same number of elements.
void checkNumberOfRows() const;
/// Approximate number of bytes in memory - for profiling and limits.
size_t bytes() const;
2012-05-17 19:15:53 +00:00
2017-07-13 14:47:46 +00:00
/// Approximate number of allocated bytes in memory - for profiling and limits.
size_t allocatedBytes() const;
2017-07-13 14:47:46 +00:00
operator bool() const { return !data.empty(); }
bool operator!() const { return data.empty(); }
2011-08-14 00:49:30 +00:00
2017-04-30 13:50:16 +00:00
/** Get a list of column names separated by commas. */
std::string dumpNames() const;
2011-09-19 03:34:23 +00:00
2017-04-30 13:50:16 +00:00
/** List of names, types and lengths of columns. Designed for debugging. */
std::string dumpStructure() const;
2017-04-30 13:50:16 +00:00
/** Get the same block, but empty. */
Block cloneEmpty() const;
2017-04-30 13:50:16 +00:00
/** Get a block with columns that have been rearranged in the order of their names. */
Block sortColumns() const;
2017-04-30 13:50:16 +00:00
/** Replaces the offset columns within the nested tables by one common for the table.
* Throws an exception if these offsets suddenly turn out to be different.
*/
void optimizeNestedArraysOffsets();
2017-04-30 13:50:16 +00:00
/** The same, only without changing the offsets. */
void checkNestedArraysOffsets() const;
void clear();
void swap(Block & other) noexcept;
/** Some column implementations (ColumnArray) may have shared parts between different columns
* (common array sizes of elements of nested data structures).
* Before doing mutating operations on such columns, you must unshare that parts.
* Also unsharing columns, if whole columns are shared_ptrs pointing to same instances.
*/
void unshareColumns();
2017-04-13 19:30:51 +00:00
/** Updates SipHash of the Block, using update method of columns.
2017-07-13 14:47:46 +00:00
* Returns hash for block, that could be used to differentiate blocks
2017-04-13 19:30:51 +00:00
* with same structure, but different data.
*/
void updateHash(SipHash & hash) const;
2016-08-04 21:40:20 +00:00
private:
void eraseImpl(size_t position);
void initializeIndexByName();
2010-03-01 16:59:51 +00:00
};
using Blocks = std::vector<Block>;
using BlocksList = std::list<Block>;
2012-07-17 20:04:39 +00:00
2016-12-21 00:18:11 +00:00
2017-04-30 13:50:16 +00:00
/// Compare column types for blocks. The order of the columns matters. Names do not matter.
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
2016-12-21 00:18:11 +00:00
/// Calculate difference in structure of blocks and write description into output strings.
void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);
2016-12-20 22:22:23 +00:00
2017-04-30 13:50:16 +00:00
/** Additional data to the blocks. They are only needed for a query
* DESCRIBE TABLE with Distributed tables.
2015-10-12 14:53:16 +00:00
*/
struct BlockExtraInfo
{
BlockExtraInfo() {}
operator bool() const { return is_valid; }
bool operator!() const { return !is_valid; }
2015-10-12 14:53:16 +00:00
std::string host;
std::string resolved_address;
std::string user;
UInt16 port = 0;
2015-10-12 14:53:16 +00:00
bool is_valid = false;
2015-10-12 14:53:16 +00:00
};
2010-03-01 16:59:51 +00:00
}