ClickHouse/src/Processors/Chunk.h

143 lines
4.5 KiB
C++
Raw Normal View History

2019-02-19 18:41:18 +00:00
#pragma once
#include <Columns/IColumn.h>
2019-04-05 12:29:17 +00:00
#include <unordered_map>
2019-02-19 18:41:18 +00:00
namespace DB
{
class ChunkInfo
{
public:
virtual ~ChunkInfo() = default;
2019-04-05 11:43:28 +00:00
ChunkInfo() = default;
2019-02-19 18:41:18 +00:00
};
using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
2020-02-28 15:47:38 +00:00
/**
* Chunk is a list of columns with the same length.
* Chunk stores the number of rows in a separate field and supports invariant of equal column length.
*
* Chunk has move-only semantic. It's more lightweight than block cause doesn't store names, types and index_by_name.
*
* Chunk can have empty set of columns but non-zero number of rows. It helps when only the number of rows is needed.
* Chunk can have columns with zero number of rows. It may happen, for example, if all rows were filtered.
* Chunk is empty only if it has zero rows and empty list of columns.
*
* Any ChunkInfo may be attached to chunk.
* It may be useful if additional info per chunk is needed. For example, bucket number for aggregated data.
**/
2019-02-19 18:41:18 +00:00
class Chunk
{
public:
Chunk() = default;
2019-06-18 08:25:27 +00:00
Chunk(const Chunk & other) = delete;
2019-06-18 10:33:02 +00:00
Chunk(Chunk && other) noexcept
: columns(std::move(other.columns))
, num_rows(other.num_rows)
, chunk_info(std::move(other.chunk_info))
{
other.num_rows = 0;
}
2019-02-19 18:41:18 +00:00
Chunk(Columns columns_, UInt64 num_rows_);
Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
Chunk(MutableColumns columns_, UInt64 num_rows_);
Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
2019-06-18 08:25:27 +00:00
Chunk & operator=(const Chunk & other) = delete;
2019-06-18 10:33:02 +00:00
Chunk & operator=(Chunk && other) noexcept
{
columns = std::move(other.columns);
chunk_info = std::move(other.chunk_info);
num_rows = other.num_rows;
other.num_rows = 0;
return *this;
}
2019-02-19 18:41:18 +00:00
2019-06-18 08:25:27 +00:00
Chunk clone() const;
2019-06-19 18:30:02 +00:00
void swap(Chunk & other)
{
columns.swap(other.columns);
chunk_info.swap(other.chunk_info);
std::swap(num_rows, other.num_rows);
}
void clear()
{
num_rows = 0;
columns.clear();
chunk_info.reset();
}
2019-02-19 18:41:18 +00:00
const Columns & getColumns() const { return columns; }
void setColumns(Columns columns_, UInt64 num_rows_);
void setColumns(MutableColumns columns_, UInt64 num_rows_);
Columns detachColumns();
MutableColumns mutateColumns();
2019-02-26 18:40:08 +00:00
/** Get empty columns with the same types as in block. */
MutableColumns cloneEmptyColumns() const;
2019-02-19 18:41:18 +00:00
const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
bool hasChunkInfo() const { return chunk_info != nullptr; }
2019-02-19 18:41:18 +00:00
void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }
UInt64 getNumRows() const { return num_rows; }
UInt64 getNumColumns() const { return columns.size(); }
2019-10-21 16:26:29 +00:00
bool hasRows() const { return num_rows > 0; }
bool hasColumns() const { return !columns.empty(); }
bool empty() const { return !hasRows() && !hasColumns(); }
operator bool() const { return !empty(); } /// NOLINT
2019-02-19 18:41:18 +00:00
2019-10-21 16:26:29 +00:00
void addColumn(ColumnPtr column);
2021-11-19 13:09:12 +00:00
void addColumn(size_t position, ColumnPtr column);
void erase(size_t position);
2019-02-19 18:41:18 +00:00
2019-03-04 14:56:09 +00:00
UInt64 bytes() const;
2019-02-26 18:40:08 +00:00
UInt64 allocatedBytes() const;
std::string dumpStructure() const;
void append(const Chunk & chunk);
2022-05-06 16:48:48 +00:00
2019-02-19 18:41:18 +00:00
private:
Columns columns;
UInt64 num_rows = 0;
ChunkInfoPtr chunk_info;
void checkNumRowsIsConsistent();
};
using Chunks = std::vector<Chunk>;
2020-02-28 15:47:38 +00:00
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
2019-02-19 18:41:18 +00:00
class ChunkMissingValues : public ChunkInfo
{
public:
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
void setBit(size_t column_idx, size_t row_idx);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }
void clear() { rows_mask_by_column_id.clear(); }
private:
using RowsMaskByColumnId = std::unordered_map<size_t, RowsBitMask>;
/// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default.
/// It could contain less columns and rows then related block.
RowsMaskByColumnId rows_mask_by_column_id;
};
2021-12-08 15:29:00 +00:00
/// Converts all columns to full serialization in chunk.
/// It's needed, when you have to access to the internals of the column,
/// or when you need to perform operation with two columns
/// and their structure must be equal (e.g. compareAt).
2021-04-06 15:59:03 +00:00
void convertToFullIfSparse(Chunk & chunk);
2019-02-19 18:41:18 +00:00
}