2019-02-19 18:41:18 +00:00
|
|
|
#include <Processors/Chunk.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2019-04-08 10:37:09 +00:00
|
|
|
#include <IO/Operators.h>
|
2021-04-06 15:59:03 +00:00
|
|
|
#include <Columns/ColumnSparse.h>
|
2019-02-19 18:41:18 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-02-22 17:45:56 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2019-09-27 13:44:33 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-02-22 17:45:56 +00:00
|
|
|
extern const int POSITION_OUT_OF_BOUND;
|
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_)
|
|
|
|
{
|
|
|
|
checkNumRowsIsConsistent();
|
|
|
|
}
|
|
|
|
|
|
|
|
Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
|
|
|
: columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
|
|
|
|
{
|
|
|
|
checkNumRowsIsConsistent();
|
|
|
|
}
|
|
|
|
|
|
|
|
static Columns unmuteColumns(MutableColumns && mut_columns)
|
|
|
|
{
|
|
|
|
Columns columns;
|
|
|
|
columns.reserve(mut_columns.size());
|
|
|
|
for (auto & col : mut_columns)
|
|
|
|
columns.emplace_back(std::move(col));
|
|
|
|
|
|
|
|
return columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
|
|
|
|
: columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_)
|
|
|
|
{
|
2019-12-03 11:56:51 +00:00
|
|
|
checkNumRowsIsConsistent();
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
|
|
|
: columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
|
|
|
|
{
|
2019-12-03 11:56:51 +00:00
|
|
|
checkNumRowsIsConsistent();
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|
|
|
|
|
2019-06-18 08:25:27 +00:00
|
|
|
Chunk Chunk::clone() const
|
|
|
|
{
|
2020-04-22 13:52:07 +00:00
|
|
|
return Chunk(getColumns(), getNumRows(), chunk_info);
|
2019-06-18 08:25:27 +00:00
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
|
|
|
{
|
|
|
|
columns = std::move(columns_);
|
|
|
|
num_rows = num_rows_;
|
|
|
|
checkNumRowsIsConsistent();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_)
|
|
|
|
{
|
|
|
|
columns = unmuteColumns(std::move(columns_));
|
|
|
|
num_rows = num_rows_;
|
|
|
|
checkNumRowsIsConsistent();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Chunk::checkNumRowsIsConsistent()
|
|
|
|
{
|
2020-07-28 15:10:36 +00:00
|
|
|
for (size_t i = 0; i < columns.size(); ++i)
|
|
|
|
{
|
|
|
|
auto & column = columns[i];
|
2019-02-19 18:41:18 +00:00
|
|
|
if (column->size() != num_rows)
|
2020-07-28 15:10:36 +00:00
|
|
|
throw Exception("Invalid number of rows in Chunk column " + column->getName()+ " position " + toString(i) + ": expected " +
|
2019-02-19 18:41:18 +00:00
|
|
|
toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
|
2020-07-28 15:10:36 +00:00
|
|
|
}
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MutableColumns Chunk::mutateColumns()
|
|
|
|
{
|
|
|
|
size_t num_columns = columns.size();
|
|
|
|
MutableColumns mut_columns(num_columns);
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
2020-05-14 08:30:18 +00:00
|
|
|
mut_columns[i] = IColumn::mutate(std::move(columns[i]));
|
2019-02-19 18:41:18 +00:00
|
|
|
|
|
|
|
columns.clear();
|
|
|
|
num_rows = 0;
|
|
|
|
|
|
|
|
return mut_columns;
|
|
|
|
}
|
|
|
|
|
2019-02-26 18:40:08 +00:00
|
|
|
MutableColumns Chunk::cloneEmptyColumns() const
|
|
|
|
{
|
|
|
|
size_t num_columns = columns.size();
|
|
|
|
MutableColumns mut_columns(num_columns);
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
mut_columns[i] = columns[i]->cloneEmpty();
|
|
|
|
return mut_columns;
|
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
Columns Chunk::detachColumns()
|
|
|
|
{
|
|
|
|
num_rows = 0;
|
|
|
|
return std::move(columns);
|
|
|
|
}
|
|
|
|
|
2019-10-21 16:26:29 +00:00
|
|
|
void Chunk::addColumn(ColumnPtr column)
|
|
|
|
{
|
2021-10-05 09:51:43 +00:00
|
|
|
if (empty())
|
|
|
|
num_rows = column->size();
|
|
|
|
else if (column->size() != num_rows)
|
2019-10-21 16:26:29 +00:00
|
|
|
throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
|
|
|
|
toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-10-21 23:49:44 +00:00
|
|
|
columns.emplace_back(std::move(column));
|
2019-10-21 16:26:29 +00:00
|
|
|
}
|
|
|
|
|
2021-11-19 13:09:12 +00:00
|
|
|
void Chunk::addColumn(size_t position, ColumnPtr column)
|
|
|
|
{
|
|
|
|
if (position >= columns.size())
|
|
|
|
throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND,
|
|
|
|
"Position {} out of bound in Chunk::addColumn(), max position = {}",
|
|
|
|
position, columns.size() - 1);
|
|
|
|
if (empty())
|
|
|
|
num_rows = column->size();
|
|
|
|
else if (column->size() != num_rows)
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Invalid number of rows in Chunk column {}: expected {}, got {}",
|
|
|
|
column->getName(), num_rows, column->size());
|
|
|
|
|
|
|
|
columns.emplace(columns.begin() + position, std::move(column));
|
|
|
|
}
|
|
|
|
|
2019-02-22 17:45:56 +00:00
|
|
|
void Chunk::erase(size_t position)
|
|
|
|
{
|
|
|
|
if (columns.empty())
|
|
|
|
throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
if (position >= columns.size())
|
|
|
|
throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = "
|
|
|
|
+ toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
columns.erase(columns.begin() + position);
|
|
|
|
}
|
|
|
|
|
2019-03-04 14:56:09 +00:00
|
|
|
UInt64 Chunk::bytes() const
|
|
|
|
{
|
|
|
|
UInt64 res = 0;
|
|
|
|
for (const auto & column : columns)
|
|
|
|
res += column->byteSize();
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-02-26 18:40:08 +00:00
|
|
|
UInt64 Chunk::allocatedBytes() const
|
|
|
|
{
|
|
|
|
UInt64 res = 0;
|
|
|
|
for (const auto & column : columns)
|
|
|
|
res += column->allocatedBytes();
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-04-08 10:37:09 +00:00
|
|
|
std::string Chunk::dumpStructure() const
|
|
|
|
{
|
|
|
|
WriteBufferFromOwnString out;
|
2020-04-22 06:34:20 +00:00
|
|
|
for (const auto & column : columns)
|
2019-04-08 10:37:09 +00:00
|
|
|
out << ' ' << column->dumpStructure();
|
|
|
|
|
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
|
|
|
|
void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx)
|
|
|
|
{
|
|
|
|
RowsBitMask & mask = rows_mask_by_column_id[column_idx];
|
|
|
|
mask.resize(row_idx + 1);
|
|
|
|
mask[row_idx] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const
|
|
|
|
{
|
|
|
|
static RowsBitMask none;
|
|
|
|
auto it = rows_mask_by_column_id.find(column_idx);
|
|
|
|
if (it != rows_mask_by_column_id.end())
|
|
|
|
return it->second;
|
|
|
|
return none;
|
|
|
|
}
|
|
|
|
|
2021-04-06 15:59:03 +00:00
|
|
|
void convertToFullIfSparse(Chunk & chunk)
|
|
|
|
{
|
|
|
|
size_t num_rows = chunk.getNumRows();
|
|
|
|
auto columns = chunk.detachColumns();
|
|
|
|
for (auto & column : columns)
|
|
|
|
column = recursiveRemoveSparse(column);
|
|
|
|
|
|
|
|
chunk.setColumns(std::move(columns), num_rows);
|
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|