ClickHouse/src/Columns/ColumnTuple.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

585 lines
17 KiB
C++
Raw Normal View History

#include <Columns/ColumnTuple.h>
2022-01-30 19:49:48 +00:00
#include <base/sort.h>
2020-06-01 12:10:32 +00:00
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnCompressed.h>
#include <Core/Field.h>
2021-10-08 14:03:54 +00:00
#include <Processors/Transforms/ColumnGathererTransform.h>
2018-06-05 19:46:49 +00:00
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
2021-10-29 17:21:02 +00:00
#include <DataTypes/Serializations/SerializationInfoTuple.h>
2020-11-10 20:25:29 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
2020-03-19 17:35:08 +00:00
extern const int LOGICAL_ERROR;
}
std::string ColumnTuple::getName() const
{
2018-06-05 19:46:49 +00:00
WriteBufferFromOwnString res;
res << "Tuple(";
bool is_first = true;
for (const auto & column : columns)
{
if (!is_first)
res << ", ";
is_first = false;
res << column->getName();
}
res << ")";
return res.str();
}
2018-03-20 14:17:09 +00:00
ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
{
columns.reserve(mutable_columns.size());
for (auto & column : mutable_columns)
{
if (isColumnConst(*column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
2018-03-20 14:17:09 +00:00
columns.push_back(std::move(column));
}
}
2018-03-21 19:39:14 +00:00
ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
{
for (const auto & column : columns)
if (isColumnConst(*column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
2018-03-20 14:17:09 +00:00
auto column_tuple = ColumnTuple::create(MutableColumns());
column_tuple->columns.assign(columns.begin(), columns.end());
2018-03-20 14:17:09 +00:00
return column_tuple;
}
ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
{
for (const auto & column : columns)
if (isColumnConst(*column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
auto column_tuple = ColumnTuple::create(MutableColumns());
column_tuple->columns = columns;
return column_tuple;
}
MutableColumnPtr ColumnTuple::cloneEmpty() const
{
const size_t tuple_size = columns.size();
2018-03-20 14:17:09 +00:00
MutableColumns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->cloneEmpty();
2018-03-20 14:17:09 +00:00
return ColumnTuple::create(std::move(new_columns));
}
2019-08-28 15:44:18 +00:00
MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
{
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->cloneResized(new_size);
return ColumnTuple::create(std::move(new_columns));
}
Field ColumnTuple::operator[](size_t n) const
{
Field res;
get(n, res);
return res;
}
void ColumnTuple::get(size_t n, Field & res) const
{
const size_t tuple_size = columns.size();
res = Tuple();
2022-09-10 03:04:40 +00:00
Tuple & res_tuple = res.get<Tuple &>();
res_tuple.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
res_tuple.push_back((*columns[i])[n]);
}
bool ColumnTuple::isDefaultAt(size_t n) const
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
if (!columns[i]->isDefaultAt(n))
return false;
return true;
}
2017-12-01 21:13:25 +00:00
StringRef ColumnTuple::getDataAt(size_t) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDataAt is not supported for {}", getName());
}
2017-12-01 21:13:25 +00:00
void ColumnTuple::insertData(const char *, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertData is not supported for {}", getName());
}
void ColumnTuple::insert(const Field & x)
{
2022-09-10 03:04:40 +00:00
const auto & tuple = x.get<const Tuple &>();
const size_t tuple_size = columns.size();
if (tuple.size() != tuple_size)
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insert(tuple[i]);
}
void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
{
const ColumnTuple & src = assert_cast<const ColumnTuple &>(src_);
const size_t tuple_size = columns.size();
if (src.columns.size() != tuple_size)
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insertFrom(*src.columns[i], n);
}
void ColumnTuple::insertDefault()
{
for (auto & column : columns)
column->insertDefault();
}
void ColumnTuple::popBack(size_t n)
{
for (auto & column : columns)
column->popBack(n);
}
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
StringRef res(begin, 0);
2020-04-22 06:22:14 +00:00
for (const auto & column : columns)
{
auto value_ref = column->serializeValueIntoArena(n, arena, begin);
res.data = value_ref.data - res.size;
res.size += value_ref.size;
}
return res;
}
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
{
for (auto & column : columns)
pos = column->deserializeAndInsertFromArena(pos);
return pos;
}
const char * ColumnTuple::skipSerializedInArena(const char * pos) const
{
2021-02-21 12:50:55 +00:00
for (const auto & column : columns)
pos = column->skipSerializedInArena(pos);
return pos;
}
void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
{
2020-04-22 06:22:14 +00:00
for (const auto & column : columns)
column->updateHashWithValue(n, hash);
}
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
2020-04-22 06:22:14 +00:00
for (const auto & column : columns)
column->updateWeakHash32(hash);
}
void ColumnTuple::updateHashFast(SipHash & hash) const
{
for (const auto & column : columns)
column->updateHashFast(hash);
}
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insertRangeFrom(
*assert_cast<const ColumnTuple &>(src).columns[i],
start, length);
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->filter(filt, result_size_hint);
return ColumnTuple::create(new_columns);
}
void ColumnTuple::expand(const Filter & mask, bool inverted)
{
for (auto & column : columns)
column->expand(mask, inverted);
}
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->permute(perm, limit);
return ColumnTuple::create(new_columns);
}
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
2018-04-23 16:40:25 +00:00
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->index(indexes, limit);
return ColumnTuple::create(new_columns);
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->replicate(offsets);
return ColumnTuple::create(new_columns);
}
MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & selector) const
{
const size_t tuple_size = columns.size();
std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx)
scattered_tuple_elements[tuple_element_idx] = columns[tuple_element_idx]->scatter(num_columns, selector);
MutableColumns res(num_columns);
for (size_t scattered_idx = 0; scattered_idx < num_columns; ++scattered_idx)
{
2018-03-20 14:17:09 +00:00
MutableColumns new_columns(tuple_size);
for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx)
new_columns[tuple_element_idx] = std::move(scattered_tuple_elements[tuple_element_idx][scattered_idx]);
2018-03-20 14:17:09 +00:00
res[scattered_idx] = ColumnTuple::create(std::move(new_columns));
}
return res;
}
2020-10-29 11:24:01 +00:00
int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
2020-10-29 11:24:01 +00:00
{
int res;
if (collator && columns[i]->isCollationSupported())
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
else
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
if (res)
return res;
2020-10-29 11:24:01 +00:00
}
return 0;
}
2020-10-29 11:24:01 +00:00
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
2020-06-17 11:43:55 +00:00
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
2020-06-01 12:10:32 +00:00
{
2020-06-17 11:43:55 +00:00
return doCompareColumn<ColumnTuple>(assert_cast<const ColumnTuple &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
2020-06-01 12:10:32 +00:00
}
2020-10-29 11:24:01 +00:00
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
2021-02-26 04:50:04 +00:00
bool ColumnTuple::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnTuple>();
}
2020-11-03 14:25:52 +00:00
template <bool positive>
struct ColumnTuple::Less
{
TupleColumns columns;
int nan_direction_hint;
2020-10-29 11:24:01 +00:00
const Collator * collator;
2020-11-03 14:25:52 +00:00
Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr)
: columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_)
{
}
bool operator() (size_t a, size_t b) const
{
for (const auto & column : columns)
{
2020-10-29 11:24:01 +00:00
int res;
if (collator && column->isCollationSupported())
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
else
res = column->compareAt(a, b, *column, nan_direction_hint);
if (res < 0)
2020-11-03 14:25:52 +00:00
return positive;
else if (res > 0)
2020-11-03 14:25:52 +00:00
return !positive;
}
return false;
}
};
void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
{
size_t rows = size();
res.resize(rows);
for (size_t i = 0; i < rows; ++i)
res[i] = i;
if (limit >= rows)
limit = 0;
EqualRanges ranges;
ranges.emplace_back(0, rows);
updatePermutationImpl(direction, stability, limit, nan_direction_hint, res, ranges, collator);
}
void ColumnTuple::updatePermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
2020-05-14 21:00:56 +00:00
{
2020-09-09 12:27:05 +00:00
if (equal_ranges.empty())
2020-09-04 18:05:06 +00:00
return;
2020-09-09 12:27:05 +00:00
for (const auto & column : columns)
2020-05-14 21:00:56 +00:00
{
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first)
equal_ranges.pop_back();
2020-10-29 11:24:01 +00:00
if (collator && column->isCollationSupported())
column->updatePermutationWithCollation(*collator, direction, stability, limit, nan_direction_hint, res, equal_ranges);
2020-10-29 11:24:01 +00:00
else
column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
2020-05-25 14:33:31 +00:00
2020-09-09 12:27:05 +00:00
if (equal_ranges.empty())
2020-05-12 00:58:58 +00:00
break;
}
}
void ColumnTuple::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const
2020-10-29 11:24:01 +00:00
{
getPermutationImpl(direction, stability, limit, nan_direction_hint, res, nullptr);
2020-10-29 11:24:01 +00:00
}
void ColumnTuple::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
2020-10-29 11:24:01 +00:00
{
updatePermutationImpl(direction, stability, limit, nan_direction_hint, res, equal_ranges);
2020-10-29 11:24:01 +00:00
}
void ColumnTuple::getPermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const
2020-10-29 11:24:01 +00:00
{
getPermutationImpl(direction, stability, limit, nan_direction_hint, res, &collator);
2020-10-29 11:24:01 +00:00
}
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
2020-10-29 11:24:01 +00:00
{
updatePermutationImpl(direction, stability, limit, nan_direction_hint, res, equal_ranges, &collator);
2020-10-29 11:24:01 +00:00
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnTuple::reserve(size_t n)
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
getColumn(i).reserve(n);
}
void ColumnTuple::ensureOwnership()
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
getColumn(i).ensureOwnership();
}
size_t ColumnTuple::byteSize() const
{
size_t res = 0;
for (const auto & column : columns)
res += column->byteSize();
return res;
}
2021-01-02 22:58:10 +00:00
size_t ColumnTuple::byteSizeAt(size_t n) const
{
size_t res = 0;
for (const auto & column : columns)
res += column->byteSizeAt(n);
return res;
}
size_t ColumnTuple::allocatedBytes() const
{
size_t res = 0;
for (const auto & column : columns)
res += column->allocatedBytes();
return res;
}
void ColumnTuple::protect()
{
for (auto & column : columns)
column->protect();
}
void ColumnTuple::getExtremes(Field & min, Field & max) const
{
const size_t tuple_size = columns.size();
Tuple min_tuple(tuple_size);
Tuple max_tuple(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->getExtremes(min_tuple[i], max_tuple[i]);
min = min_tuple;
max = max_tuple;
}
2022-11-16 00:46:57 +00:00
void ColumnTuple::forEachSubcolumn(ColumnCallback callback) const
{
2022-11-16 15:34:36 +00:00
for (const auto & column : columns)
callback(column);
}
2022-11-16 00:46:57 +00:00
void ColumnTuple::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
{
2022-11-16 15:34:36 +00:00
for (const auto & column : columns)
{
2022-11-16 00:46:57 +00:00
callback(*column);
column->forEachSubcolumnRecursively(callback);
}
}
bool ColumnTuple::structureEquals(const IColumn & rhs) const
{
2020-04-22 06:22:14 +00:00
if (const auto * rhs_tuple = typeid_cast<const ColumnTuple *>(&rhs))
{
const size_t tuple_size = columns.size();
if (tuple_size != rhs_tuple->columns.size())
return false;
for (size_t i = 0; i < tuple_size; ++i)
if (!columns[i]->structureEquals(*rhs_tuple->columns[i]))
return false;
return true;
}
else
return false;
}
2020-10-29 11:24:01 +00:00
bool ColumnTuple::isCollationSupported() const
{
for (const auto & column : columns)
2020-10-29 11:24:01 +00:00
{
if (column->isCollationSupported())
return true;
}
return false;
}
ColumnPtr ColumnTuple::compress() const
{
size_t byte_size = 0;
Columns compressed;
compressed.reserve(columns.size());
for (const auto & column : columns)
{
auto compressed_column = column->compress();
byte_size += compressed_column->byteSize();
compressed.emplace_back(std::move(compressed_column));
}
return ColumnCompressed::create(size(), byte_size,
2021-02-17 23:52:07 +00:00
[compressed = std::move(compressed)]() mutable
{
for (auto & column : compressed)
column = column->decompress();
return ColumnTuple::create(compressed);
});
}
double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
}
void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
2022-05-06 14:44:00 +00:00
void ColumnTuple::finalize()
{
for (auto & column : columns)
column->finalize();
}
bool ColumnTuple::isFinalized() const
{
return std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column->isFinalized(); });
}
}